diff --git a/backends/nxp/backend/neutron_converter_manager.py b/backends/nxp/backend/neutron_converter_manager.py index 90095527364..2ad31f044cb 100644 --- a/backends/nxp/backend/neutron_converter_manager.py +++ b/backends/nxp/backend/neutron_converter_manager.py @@ -67,6 +67,7 @@ def convert( target: str, delegation_tag: str, fetch_constants_to_sram: bool = False, + use_new_flow_neutron_c: bool = False, ) -> bytes: """ Call Neutron Converter. @@ -75,6 +76,7 @@ def convert( :param target: The target platform. :param delegation_tag: The delegation tag of model partition. :param fetch_constants_to_sram: Add microcode that fetches weights from external memory. + :param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support. This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers). :return: TFLite model with Neutron microcode as bytes. @@ -90,6 +92,7 @@ def convert( ) cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram cctx.compilationOpts.dumpKernelSelectionCode = self.dump_kernel_selection_code + cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c # Try to use multiprocessing for isolation, but fall back to direct execution # if the environment doesn't support it (e.g., in sandcastle/build environments) diff --git a/backends/nxp/nxp_backend.py b/backends/nxp/nxp_backend.py index 38878465d58..5fc1f512165 100644 --- a/backends/nxp/nxp_backend.py +++ b/backends/nxp/nxp_backend.py @@ -50,6 +50,7 @@ def __init__(self): self.use_neutron_for_format_conversion = True self.fetch_constants_to_sram = False self.dump_kernel_selection_code = False + self.use_new_flow_neutron_c = False def _replace_colons(self, operator: str) -> str: """ @@ -65,20 +66,21 @@ def neutron_compile_spec( use_neutron_for_format_conversion: bool = True, fetch_constants_to_sram: bool = False, dump_kernel_selection_code: bool = False, - ): - """ - Generate compile spec for Neutron NPU - - Args: - config: Neutron accelerator configuration, e.g. "imxrt700" - extra_flags: Extra flags for the Neutron compiler - operators_not_to_delegate: List of operators that should not be delegated - use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to + use_new_flow_neutron_c: bool = False, + ) -> "NeutronCompileSpecBuilder": + """Generate compile spec for Neutron NPU + + :param config: Neutron accelerator configuration, e.g. "imxrt700" + :param extra_flags: Extra flags for the Neutron compiler + :param operators_not_to_delegate: List of operators that should not be delegated + :param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to ensure that the IO matches the executorch partition, which will be delegated to Neutron. - fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights + :param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights from FLASH to SRAM. This should be used when the whole model does not fit into SRAM. - dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code. + :param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code. + :param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support. + :return: self for method chaining """ self.config = NeutronTargetSpec(config) @@ -100,6 +102,7 @@ def neutron_compile_spec( self.use_neutron_for_format_conversion = use_neutron_for_format_conversion self.fetch_constants_to_sram = fetch_constants_to_sram self.dump_kernel_selection_code = dump_kernel_selection_code + self.use_new_flow_neutron_c = use_new_flow_neutron_c return self @@ -128,6 +131,10 @@ def build(self): "dump_kernel_selection_code", f"{self.dump_kernel_selection_code}".encode(), ), + CompileSpec( + "use_new_flow_neutron_c", + f"{self.use_new_flow_neutron_c}".encode(), + ), ] return self.compile_spec @@ -141,6 +148,7 @@ def generate_neutron_compile_spec( use_neutron_for_format_conversion: bool = True, fetch_constants_to_sram: bool = False, dump_kernel_selection_code: bool = False, + use_new_flow_neutron_c: bool = False, ) -> List[CompileSpec]: return ( NeutronCompileSpecBuilder() @@ -151,6 +159,7 @@ def generate_neutron_compile_spec( use_neutron_for_format_conversion=use_neutron_for_format_conversion, fetch_constants_to_sram=fetch_constants_to_sram, dump_kernel_selection_code=dump_kernel_selection_code, + use_new_flow_neutron_c=use_new_flow_neutron_c, ) .build() ) @@ -175,6 +184,7 @@ def preprocess( # noqa C901 use_neutron_for_format_conversion = None fetch_constants_to_sram = False dump_kernel_selection_code = None + use_new_flow_neutron_c = False for spec in compile_spec: if spec.key == "output_format": output_format = spec.value.decode() @@ -188,6 +198,8 @@ def preprocess( # noqa C901 fetch_constants_to_sram = spec.value.decode() == "True" if spec.key == "dump_kernel_selection_code": dump_kernel_selection_code = spec.value.decode() == "True" + if spec.key == "use_new_flow_neutron_c": + use_new_flow_neutron_c = spec.value.decode() == "True" # Check that the output format is set in the compile spec if not output_format: @@ -220,7 +232,11 @@ def preprocess( # noqa C901 ) neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert( - tflite_model, target, delegation_tag, fetch_constants_to_sram + tflite_model, + target, + delegation_tag, + fetch_constants_to_sram, + use_new_flow_neutron_c, ) # Dump the tflite file if logging level is enabled diff --git a/backends/nxp/requirements-eiq.txt b/backends/nxp/requirements-eiq.txt index ff457cbb55b..61e5c882c40 100644 --- a/backends/nxp/requirements-eiq.txt +++ b/backends/nxp/requirements-eiq.txt @@ -1,3 +1,3 @@ --index-url https://eiq.nxp.com/repository -eiq-neutron-sdk==3.0.1 +eiq-neutron-sdk==3.1.0 eiq_nsys diff --git a/backends/nxp/tests/executorch_pipeline.py b/backends/nxp/tests/executorch_pipeline.py index f1afbd2d0f7..bfe7aca0e27 100644 --- a/backends/nxp/tests/executorch_pipeline.py +++ b/backends/nxp/tests/executorch_pipeline.py @@ -130,6 +130,7 @@ def to_quantized_edge_program( use_quant_state_dict: bool = True, fetch_constants_to_sram: bool = False, dump_kernel_selection_code: bool = False, + use_new_flow_neutron_c: bool = False, ) -> EdgeProgramManager: _neutron_target_spec = NeutronTargetSpec(target) if get_quantizer_fn is None: @@ -160,6 +161,7 @@ def to_quantized_edge_program( use_neutron_for_format_conversion=use_neutron_for_format_conversion, fetch_constants_to_sram=fetch_constants_to_sram, dump_kernel_selection_code=dump_kernel_selection_code, + use_new_flow_neutron_c=use_new_flow_neutron_c, ) post_quant_state_dict = ( exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None diff --git a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py index df6baf15c21..097b8720169 100644 --- a/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py +++ b/backends/nxp/tests/ir/converter/node_converter/test_constant_pad_nd_converter.py @@ -168,7 +168,7 @@ def test_constant_pad_nd__delegation__formatless__supported_padding(use_qat): paddings = [0, 0, 1, 2, 3, 4] # The last dim is padded using the first 2 paddings. model = ConstantPadNDModule(paddings) exec_program = to_quantized_edge_program( - model, input_shape, use_qat=use_qat + model, input_shape, use_qat=use_qat, use_new_flow_neutron_c=True ).exported_program() # Make sure the `pad` was delegated. @@ -191,13 +191,12 @@ def test_constant_pad_nd__delegation__formatless__unsupported_padding(use_qat): ) -@pytest.mark.xfail(reason="Regression in Neutron SW 3.0.1 (AIR-14264)", strict=True) def test_constant_pad_nd__delegation__channels_first__supported_padding(use_qat): input_shape = (2, 4, 6, 8) # Channels first -> the second dim (4) will be padded. paddings = [1, 2, 3, 4, 0, 0] # The second dim is padded using the paddings[4:6]. model = ConstantPadNDConvModule(paddings) exec_program = to_quantized_edge_program( - model, input_shape, use_qat=use_qat + model, input_shape, use_qat=use_qat, use_new_flow_neutron_c=True ).exported_program() # Make sure the `pad` was delegated. diff --git a/backends/nxp/tests/test_neutron_converter_manager.py b/backends/nxp/tests/test_neutron_converter_manager.py index aea0afe0c6f..a787df5897c 100644 --- a/backends/nxp/tests/test_neutron_converter_manager.py +++ b/backends/nxp/tests/test_neutron_converter_manager.py @@ -3,7 +3,10 @@ # This source code is licensed under the BSD-style license found in the # LICENSE file in the root directory of this source tree. +import multiprocessing + import torch +from eiq_neutron_sdk.neutron_converter.neutron_converter import CompilationContext from executorch import exir from executorch.backends.nxp.backend.edge_program_converter import ( @@ -56,3 +59,17 @@ def test_conv2d_neutron_conversion__prefetching(mocker): assert len(neutron_model_prefetch) != len( neutron_model_regular ), "The weight prefetching flag does not make a difference!" + + +def test_neutron_converter_with_experimental_mlir_flow(mocker): + model = LinearModule(True) + input_shape = (1, 1, 32, 32) + + process_spy = mocker.spy(multiprocessing, "Process") + to_quantized_edge_program( + model, input_shape, use_new_flow_neutron_c=True + ).exported_program() + + compilation_context = process_spy.call_args.kwargs["args"][2] + assert isinstance(compilation_context, CompilationContext) + assert compilation_context.compilationOpts.useNewFlowNeutronC diff --git a/docs/source/backends/nxp/nxp-overview.md b/docs/source/backends/nxp/nxp-overview.md index fac06e0e417..2bf66e28e5c 100644 --- a/docs/source/backends/nxp/nxp-overview.md +++ b/docs/source/backends/nxp/nxp-overview.md @@ -24,10 +24,10 @@ Among currently supported machine learning models are: - [MCUXpresso IDE](https://www.nxp.com/design/design-center/software/development-software/mcuxpresso-software-and-tools-/mcuxpresso-integrated-development-environment-ide:MCUXpresso-IDE) or [MCUXpresso Visual Studio Code extension](https://www.nxp.com/design/design-center/software/development-software/mcuxpresso-software-and-tools-/mcuxpresso-for-visual-studio-code:MCUXPRESSO-VSC) - [MCUXpresso SDK 25.12](https://mcuxpresso.nxp.com/mcuxsdk/25.12.00/html/index.html) -- eIQ Neutron SDK version 3.0.0, what you can download from eIQ PyPI: +- eIQ Neutron SDK version 3.1.0, what you can download from eIQ PyPI: ```commandline -$ pip install --index-url https://eiq.nxp.com/repository eiq_neutron_sdk==3.0.0 +$ pip install --index-url https://eiq.nxp.com/repository eiq-neutron-sdk==3.1.0 ``` Instead of manually installing requirements, except MCUXpresso IDE and SDK, you can use the setup script: diff --git a/examples/nxp/aot_neutron_compile.py b/examples/nxp/aot_neutron_compile.py index 385e811fdaa..dda223c5650 100644 --- a/examples/nxp/aot_neutron_compile.py +++ b/examples/nxp/aot_neutron_compile.py @@ -240,6 +240,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool): action="store_true", help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.", ) + parser.add_argument( + "--use_new_flow_neutron_c", + required=False, + default=False, + action="store_true", + help="Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.", + ) args = parser.parse_args() @@ -323,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool): operators_not_to_delegate=args.operators_not_to_delegate, fetch_constants_to_sram=args.fetch_constants_to_sram, dump_kernel_selection_code=args.dump_kernel_selection_code, + use_new_flow_neutron_c=args.use_new_flow_neutron_c, ) partitioners = ( [ diff --git a/examples/nxp/setup.sh b/examples/nxp/setup.sh index a538a55abc9..e6de05b9f47 100755 --- a/examples/nxp/setup.sh +++ b/examples/nxp/setup.sh @@ -8,7 +8,7 @@ set -u EIQ_PYPI_URL="${EIQ_PYPI_URL:-https://eiq.nxp.com/repository}" # Install eIQ Neutron dependencies - SDK and simulator -pip install --index-url ${EIQ_PYPI_URL} eiq-neutron-sdk==3.0.1 eiq_nsys +pip install --index-url ${EIQ_PYPI_URL} eiq-neutron-sdk==3.1.0 eiq_nsys # Get the directory of the current script SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"