Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions backends/nxp/backend/neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def convert(
target: str,
delegation_tag: str,
fetch_constants_to_sram: bool = False,
use_new_flow_neutron_c: bool = False,
) -> bytes:
"""
Call Neutron Converter.
Expand All @@ -75,6 +76,7 @@ def convert(
:param target: The target platform.
:param delegation_tag: The delegation tag of model partition.
:param fetch_constants_to_sram: Add microcode that fetches weights from external memory.
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.
This allows running models which do not fit into SRAM. Applies to Neutron-C only (microcontrollers).

:return: TFLite model with Neutron microcode as bytes.
Expand All @@ -90,6 +92,7 @@ def convert(
)
cctx.compilationOpts.fetchConstantsToSRAM = fetch_constants_to_sram
cctx.compilationOpts.dumpKernelSelectionCode = self.dump_kernel_selection_code
cctx.compilationOpts.useNewFlowNeutronC = use_new_flow_neutron_c

# Try to use multiprocessing for isolation, but fall back to direct execution
# if the environment doesn't support it (e.g., in sandcastle/build environments)
Expand Down
40 changes: 28 additions & 12 deletions backends/nxp/nxp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def __init__(self):
self.use_neutron_for_format_conversion = True
self.fetch_constants_to_sram = False
self.dump_kernel_selection_code = False
self.use_new_flow_neutron_c = False

def _replace_colons(self, operator: str) -> str:
"""
Expand All @@ -65,20 +66,21 @@ def neutron_compile_spec(
use_neutron_for_format_conversion: bool = True,
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
):
"""
Generate compile spec for Neutron NPU

Args:
config: Neutron accelerator configuration, e.g. "imxrt700"
extra_flags: Extra flags for the Neutron compiler
operators_not_to_delegate: List of operators that should not be delegated
use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
use_new_flow_neutron_c: bool = False,
) -> "NeutronCompileSpecBuilder":
"""Generate compile spec for Neutron NPU

:param config: Neutron accelerator configuration, e.g. "imxrt700"
:param extra_flags: Extra flags for the Neutron compiler
:param operators_not_to_delegate: List of operators that should not be delegated
:param use_neutron_for_format_conversion: If True, the EdgeProgramToIRConverter will insert `Transpose` ops to
ensure that the IO matches the executorch partition, which will be
delegated to Neutron.
fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
:param fetch_constants_to_sram: If True, the Neutron Converter will insert microinstructions to prefetch weights
from FLASH to SRAM. This should be used when the whole model does not fit into SRAM.
dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
:param dump_kernel_selection_code: Whether Neutron converter dumps kernel selection code.
:param use_new_flow_neutron_c: Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.
:return: self for method chaining
"""

self.config = NeutronTargetSpec(config)
Expand All @@ -100,6 +102,7 @@ def neutron_compile_spec(
self.use_neutron_for_format_conversion = use_neutron_for_format_conversion
self.fetch_constants_to_sram = fetch_constants_to_sram
self.dump_kernel_selection_code = dump_kernel_selection_code
self.use_new_flow_neutron_c = use_new_flow_neutron_c

return self

Expand Down Expand Up @@ -128,6 +131,10 @@ def build(self):
"dump_kernel_selection_code",
f"{self.dump_kernel_selection_code}".encode(),
),
CompileSpec(
"use_new_flow_neutron_c",
f"{self.use_new_flow_neutron_c}".encode(),
),
]

return self.compile_spec
Expand All @@ -141,6 +148,7 @@ def generate_neutron_compile_spec(
use_neutron_for_format_conversion: bool = True,
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
use_new_flow_neutron_c: bool = False,
) -> List[CompileSpec]:
return (
NeutronCompileSpecBuilder()
Expand All @@ -151,6 +159,7 @@ def generate_neutron_compile_spec(
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
fetch_constants_to_sram=fetch_constants_to_sram,
dump_kernel_selection_code=dump_kernel_selection_code,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)
.build()
)
Expand All @@ -175,6 +184,7 @@ def preprocess( # noqa C901
use_neutron_for_format_conversion = None
fetch_constants_to_sram = False
dump_kernel_selection_code = None
use_new_flow_neutron_c = False
for spec in compile_spec:
if spec.key == "output_format":
output_format = spec.value.decode()
Expand All @@ -188,6 +198,8 @@ def preprocess( # noqa C901
fetch_constants_to_sram = spec.value.decode() == "True"
if spec.key == "dump_kernel_selection_code":
dump_kernel_selection_code = spec.value.decode() == "True"
if spec.key == "use_new_flow_neutron_c":
use_new_flow_neutron_c = spec.value.decode() == "True"

# Check that the output format is set in the compile spec
if not output_format:
Expand Down Expand Up @@ -220,7 +232,11 @@ def preprocess( # noqa C901
)

neutron_model = NeutronConverterManager(dump_kernel_selection_code).convert(
tflite_model, target, delegation_tag, fetch_constants_to_sram
tflite_model,
target,
delegation_tag,
fetch_constants_to_sram,
use_new_flow_neutron_c,
)

# Dump the tflite file if logging level is enabled
Expand Down
2 changes: 1 addition & 1 deletion backends/nxp/requirements-eiq.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
--index-url https://eiq.nxp.com/repository
eiq-neutron-sdk==3.0.1
eiq-neutron-sdk==3.1.0
eiq_nsys
2 changes: 2 additions & 0 deletions backends/nxp/tests/executorch_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def to_quantized_edge_program(
use_quant_state_dict: bool = True,
fetch_constants_to_sram: bool = False,
dump_kernel_selection_code: bool = False,
use_new_flow_neutron_c: bool = False,
) -> EdgeProgramManager:
_neutron_target_spec = NeutronTargetSpec(target)
if get_quantizer_fn is None:
Expand Down Expand Up @@ -160,6 +161,7 @@ def to_quantized_edge_program(
use_neutron_for_format_conversion=use_neutron_for_format_conversion,
fetch_constants_to_sram=fetch_constants_to_sram,
dump_kernel_selection_code=dump_kernel_selection_code,
use_new_flow_neutron_c=use_new_flow_neutron_c,
)
post_quant_state_dict = (
exir_program_aten__module_quant.state_dict() if use_quant_state_dict else None
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def test_constant_pad_nd__delegation__formatless__supported_padding(use_qat):
paddings = [0, 0, 1, 2, 3, 4] # The last dim is padded using the first 2 paddings.
model = ConstantPadNDModule(paddings)
exec_program = to_quantized_edge_program(
model, input_shape, use_qat=use_qat
model, input_shape, use_qat=use_qat, use_new_flow_neutron_c=True
).exported_program()

# Make sure the `pad` was delegated.
Expand All @@ -191,13 +191,12 @@ def test_constant_pad_nd__delegation__formatless__unsupported_padding(use_qat):
)


@pytest.mark.xfail(reason="Regression in Neutron SW 3.0.1 (AIR-14264)", strict=True)
def test_constant_pad_nd__delegation__channels_first__supported_padding(use_qat):
input_shape = (2, 4, 6, 8) # Channels first -> the second dim (4) will be padded.
paddings = [1, 2, 3, 4, 0, 0] # The second dim is padded using the paddings[4:6].
model = ConstantPadNDConvModule(paddings)
exec_program = to_quantized_edge_program(
model, input_shape, use_qat=use_qat
model, input_shape, use_qat=use_qat, use_new_flow_neutron_c=True
).exported_program()

# Make sure the `pad` was delegated.
Expand Down
17 changes: 17 additions & 0 deletions backends/nxp/tests/test_neutron_converter_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

import multiprocessing

import torch
from eiq_neutron_sdk.neutron_converter.neutron_converter import CompilationContext

from executorch import exir
from executorch.backends.nxp.backend.edge_program_converter import (
Expand Down Expand Up @@ -56,3 +59,17 @@ def test_conv2d_neutron_conversion__prefetching(mocker):
assert len(neutron_model_prefetch) != len(
neutron_model_regular
), "The weight prefetching flag does not make a difference!"


def test_neutron_converter_with_experimental_mlir_flow(mocker):
model = LinearModule(True)
input_shape = (1, 1, 32, 32)

process_spy = mocker.spy(multiprocessing, "Process")
to_quantized_edge_program(
model, input_shape, use_new_flow_neutron_c=True
).exported_program()

compilation_context = process_spy.call_args.kwargs["args"][2]
assert isinstance(compilation_context, CompilationContext)
assert compilation_context.compilationOpts.useNewFlowNeutronC
4 changes: 2 additions & 2 deletions docs/source/backends/nxp/nxp-overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@ Among currently supported machine learning models are:

- [MCUXpresso IDE](https://www.nxp.com/design/design-center/software/development-software/mcuxpresso-software-and-tools-/mcuxpresso-integrated-development-environment-ide:MCUXpresso-IDE) or [MCUXpresso Visual Studio Code extension](https://www.nxp.com/design/design-center/software/development-software/mcuxpresso-software-and-tools-/mcuxpresso-for-visual-studio-code:MCUXPRESSO-VSC)
- [MCUXpresso SDK 25.12](https://mcuxpresso.nxp.com/mcuxsdk/25.12.00/html/index.html)
- eIQ Neutron SDK version 3.0.0, what you can download from eIQ PyPI:
- eIQ Neutron SDK version 3.1.0, what you can download from eIQ PyPI:

```commandline
$ pip install --index-url https://eiq.nxp.com/repository eiq_neutron_sdk==3.0.0
$ pip install --index-url https://eiq.nxp.com/repository eiq-neutron-sdk==3.1.0
```

Instead of manually installing requirements, except MCUXpresso IDE and SDK, you can use the setup script:
Expand Down
8 changes: 8 additions & 0 deletions examples/nxp/aot_neutron_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,13 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
action="store_true",
help="This feature allows running models which do not fit into SRAM by offloading them to an external memory.",
)
parser.add_argument(
"--use_new_flow_neutron_c",
required=False,
default=False,
action="store_true",
help="Enable experimental MLIR-based flow for Neutron-C with improves INT8 operator support.",
)

args = parser.parse_args()

Expand Down Expand Up @@ -323,6 +330,7 @@ def get_model_and_inputs_from_name(model_name: str, use_random_dataset: bool):
operators_not_to_delegate=args.operators_not_to_delegate,
fetch_constants_to_sram=args.fetch_constants_to_sram,
dump_kernel_selection_code=args.dump_kernel_selection_code,
use_new_flow_neutron_c=args.use_new_flow_neutron_c,
)
partitioners = (
[
Expand Down
2 changes: 1 addition & 1 deletion examples/nxp/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ set -u
EIQ_PYPI_URL="${EIQ_PYPI_URL:-https://eiq.nxp.com/repository}"

# Install eIQ Neutron dependencies - SDK and simulator
pip install --index-url ${EIQ_PYPI_URL} eiq-neutron-sdk==3.0.1 eiq_nsys
pip install --index-url ${EIQ_PYPI_URL} eiq-neutron-sdk==3.1.0 eiq_nsys

# Get the directory of the current script
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
Expand Down
Loading