Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 44 additions & 67 deletions backends/arm/quantizer/arm_quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,7 @@
)
from executorch.backends.cortex_m.quantizer.pattern_matcher import PatternMatcher

from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
QuantizerReporter,
SUPPORTED_QCONFIGS,
SUPPORTED_QSPECS,
)
from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter

from torch._ops import OpOverload

Expand Down Expand Up @@ -219,20 +215,28 @@ def get_symmetric_quantization_config(
bias_quantization_spec = _get_int32_bias_qspec

if is_dynamic:
quantization_config = TOSAQuantizationConfig(
act_quantization_spec,
None,
weight_quantization_spec,
bias_quantization_spec,
)
output_activation = None
else:
quantization_config = TOSAQuantizationConfig(
act_quantization_spec,
act_quantization_spec,
weight_quantization_spec,
bias_quantization_spec,
)
return quantization_config
output_activation = act_quantization_spec

module_name = __name__.rsplit(".", maxsplit=1)[-1]
label = (
f"{module_name}.get_symmetric_quantization_config("
f"per_channel={int(is_per_channel)}, "
f"qat={int(is_qat)}, "
f"dynamic={int(is_dynamic)}, "
f"act_range=[{act_qmin}, {act_qmax}], "
f"weight_range=[{weight_qmin}, {weight_qmax}]"
")"
)

return TOSAQuantizationConfig(
act_quantization_spec,
output_activation,
weight_quantization_spec,
bias_quantization_spec,
label,
)


@functools.lru_cache
Expand Down Expand Up @@ -357,59 +361,32 @@ def get_symmetric_a16w8_quantization_config(
is_qat=is_qat,
is_dynamic=is_dynamic,
)
# Replace activation quantization spec with 16-bit version

if is_dynamic:
quantization_config = TOSAQuantizationConfig(
act_quantization_spec, # 16-bit input activations
None,
base_config.weight, # 8-bit weights from base config
base_config.bias, # bias from base config
)
output_activation = None
else:
quantization_config = TOSAQuantizationConfig(
act_quantization_spec, # 16-bit input activations
act_quantization_spec, # 16-bit output activations
base_config.weight, # 8-bit weights from base config
base_config.bias, # bias from base config
)
return quantization_config

output_activation = act_quantization_spec

module_name = __name__.rsplit(".", maxsplit=1)[-1]
label = (
f"{module_name}.get_symmetric_a16w8_quantization_config("
f"per_channel={int(is_per_channel)}, "
f"qat={int(is_qat)}, "
f"dynamic={int(is_dynamic)}, "
f"act_range=[{act_quantization_spec.quant_min}, {act_quantization_spec.quant_max}], "
f"weight_range=[{weight_qmin}, {weight_qmax}]"
")"
)

# Register supported quantization configs and qspecs in the reporter for human-readable reporting
# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
_symmetric_a8w4_config_per_channel = get_symmetric_a8w4_quantization_config()
_symmetric_a8w8_config_per_channel = get_symmetric_quantization_config()
_symmetric_a16w8_config_per_channel = get_symmetric_a16w8_quantization_config()
_symmetric_a8w4_config_per_tensor = get_symmetric_a8w4_quantization_config(
is_per_channel=False
)
_symmetric_a8w8_config_per_tensor = get_symmetric_quantization_config(
is_per_channel=False
)
_symmetric_a16w8_config_per_tensor = get_symmetric_a16w8_quantization_config(
is_per_channel=False
)
SUPPORTED_QCONFIGS.update(
{
_symmetric_a8w8_config_per_channel: f"{__name__}.get_symmetric_quantization_config(is_per_channel=True)",
_symmetric_a16w8_config_per_channel: f"{__name__}.get_symmetric_a16w8_quantization_config(is_per_channel=True)",
_symmetric_a8w4_config_per_channel: f"{__name__}.get_symmetric_a8w4_quantization_config(is_per_channel=True)",
_symmetric_a8w8_config_per_tensor: f"{__name__}.get_symmetric_quantization_config(is_per_channel=False)",
_symmetric_a16w8_config_per_tensor: f"{__name__}.get_symmetric_a16w8_quantization_config(is_per_channel=False)",
_symmetric_a8w4_config_per_tensor: f"{__name__}.get_symmetric_a8w4_quantization_config(is_per_channel=False)",
}
)
# Replace activation quantization spec with 16-bit version
return TOSAQuantizationConfig(
act_quantization_spec, # 16-bit input activations
output_activation,
base_config.weight, # 8-bit weights from base config
base_config.bias, # bias from base config
label,
)

SUPPORTED_QSPECS.update(
{
_symmetric_a8w4_config_per_channel.get_weight_qspec(): "INT4_PER_CHANNEL_QSPEC",
_symmetric_a8w8_config_per_channel.get_weight_qspec(): "INT8_PER_CHANNEL_QSPEC",
_symmetric_a8w8_config_per_tensor.get_weight_qspec(): "INT8_PER_TENSOR_QSPEC",
_symmetric_a8w4_config_per_tensor.get_weight_qspec(): "INT4_PER_TENSOR_QSPEC",
_symmetric_a8w8_config_per_tensor.get_input_act_qspec(): "INT8_PER_TENSOR_QSPEC",
_symmetric_a16w8_config_per_tensor.get_input_act_qspec(): "INT16_PER_TENSOR_QSPEC",
}
)

NodeFilterType = Callable[[Node], bool]
"""Type for a Node Filter used by annotators.
Expand Down
57 changes: 19 additions & 38 deletions backends/arm/quantizer/arm_quantizer_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@
from executorch.backends.arm.common.annotation_meta import ArmAnnotationInfo
from executorch.backends.arm.constants import DISALLOW_TFA_META_KEY
from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
from executorch.backends.cortex_m.quantizer_reporter import (
QuantizerInfo,
QuantizerReporterUser,
)
from torch.fx import Node

from torchao.quantization.pt2e.quantizer import (
Expand Down Expand Up @@ -160,25 +164,6 @@ def _get_int32_per_channel_bias_qspec(node):
)


class _QuantizerReporterUserMixin:
def __init__(self):
self.reporter = None

def register_reporter(self, reporter) -> None:
self.reporter = reporter

def report_reject(self, pattern: list[Node], reason: str) -> None:
if self.reporter is not None:
self.reporter.report_reject(self, pattern, reason)

def report_accept(self, pattern: list[Node]) -> None:
if self.reporter is not None:
self.reporter.report_accept(self, pattern)

def get_quantizer_info(self):
raise NotImplementedError("Quantizer must implement get_quantizer_info method.")


class PatternCheck:
"""Base class for pattern checks.

Expand Down Expand Up @@ -248,7 +233,7 @@ def find_nodes(self, model: torch.fx.GraphModule) -> Iterator[Node]:
pass


class PatternQuantizer(Quantizer, _QuantizerReporterUserMixin):
class PatternQuantizer(Quantizer, QuantizerReporterUser):
"""Quantizes a graph according to an OperatorConfig.

Args:
Expand All @@ -265,28 +250,28 @@ def __init__(
pattern_matcher: "PatternMatcher",
) -> None:
super().__init__()
_QuantizerReporterUserMixin.__init__(self)
QuantizerReporterUser.__init__(self)
self.quantization_config: QuantizationConfig | None = quantization_config
self.node_finder: "NodeFinder" = node_finder
self.pattern_matcher: "PatternMatcher" = pattern_matcher

def get_quantizer_info(self):
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
QuantizerInfo,
SUPPORTED_QCONFIGS,
)

name = self.__class__.__name__
targeted_nodes_description = str(self.node_finder)
quantization_config_path = SUPPORTED_QCONFIGS.get(
self.quantization_config, "UNREGISTERED_QCONFIG"
)
if self.quantization_config is None:
qconfig_label = "NO_QCONFIG"
else:
qconfig_label = (
self.quantization_config.label
if self.quantization_config.label is not None
else self.quantization_config.__class__.__name__ # no label, fallback to class name
)
support_config_path = self.pattern_matcher.support_dict_name

return QuantizerInfo(
name,
targeted_nodes_description,
quantization_config_path,
qconfig_label,
support_config_path,
)

Expand Down Expand Up @@ -397,7 +382,7 @@ def validate(self, model: torch.fx.GraphModule) -> bool: # type: ignore[overrid
return True


class SharedQspecQuantizer(Quantizer, _QuantizerReporterUserMixin):
class SharedQspecQuantizer(Quantizer, QuantizerReporterUser):
"""Assures that specific ops share quantization parameters on all
inputs/outputs.
"""
Expand Down Expand Up @@ -495,7 +480,7 @@ class SharedQspecQuantizer(Quantizer, _QuantizerReporterUserMixin):

def __init__(self, targets: Optional[list[Callable[..., object]]] = None) -> None:
super().__init__()
_QuantizerReporterUserMixin.__init__(self)
QuantizerReporterUser.__init__(self)
if targets is None:
self.targets = self.SHARED_QSPEC_OPS_DEFAULT
self.support_config_path = (
Expand All @@ -508,18 +493,14 @@ def __init__(self, targets: Optional[list[Callable[..., object]]] = None) -> Non
)

def get_quantizer_info(self):
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
QuantizerInfo,
)

name = self.__class__.__name__
targeted_nodes_description = ""
quantization_config_path = "SHARED_QCONFIG"
qconfig_label = "shared qparams for connected targeted nodes"
support_config_path = self.support_config_path
return QuantizerInfo(
name,
targeted_nodes_description,
quantization_config_path,
qconfig_label,
support_config_path,
)

Expand Down
1 change: 1 addition & 0 deletions backends/arm/quantizer/quantization_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class QuantizationConfig:
output_activation: Optional[QuantizationSpecBase]
weight: Optional[QuantizationSpecBase]
bias: Optional[QuantizationSpecBase] | Callable[[Any], Any]
label: Optional[str] = None # Optional label for debugging/visualization purposes

def get_input_act_qspec(
self, node: Optional[Node] = None, input_node: Optional[Node] = None
Expand Down
21 changes: 21 additions & 0 deletions backends/cortex_m/TARGETS
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2026 Arm Limited and/or its affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.

load("@fbcode_macros//build_defs:python_library.bzl", "python_library")

oncall("executorch")

python_library(
name = "quantizer_reporter",
srcs = [
"quantizer_reporter.py",
],
deps = [
"//caffe2:torch",
"//pytorch/ao:torchao",
"fbsource//third-party/pypi/tabulate:tabulate",
],
)
16 changes: 2 additions & 14 deletions backends/cortex_m/quantizer/TARGETS
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ python_library(
"pattern_matcher.py",
"quantization_configs.py",
"quantizer.py",
"quantizer_reporter.py",
"quantizer_support.py",
],
deps = [
Expand All @@ -27,6 +26,7 @@ python_library(
"//executorch/backends/arm/quantizer:arm_quantizer_utils",
"//executorch/backends/arm/quantizer:quantization_annotator",
"//executorch/backends/arm/quantizer:quantization_config",
"//executorch/backends/cortex_m:quantizer_reporter",
"//pytorch/ao:torchao",
"fbsource//third-party/pypi/tabulate:tabulate",
],
Expand All @@ -42,19 +42,7 @@ python_library(
"//caffe2:torch",
"//executorch/backends/arm/quantizer:arm_quantizer_utils",
"//executorch/backends/arm/quantizer:quantization_config",
"//executorch/backends/cortex_m:quantizer_reporter",
"//pytorch/ao:torchao",
":quantizer_reporter",
],
)

python_library(
name = "quantizer_reporter",
srcs = [
"quantizer_reporter.py",
],
deps = [
"//caffe2:torch",
"//pytorch/ao:torchao",
"fbsource//third-party/pypi/tabulate:tabulate",
],
)
27 changes: 2 additions & 25 deletions backends/cortex_m/quantizer/quantization_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,6 @@
_get_int32_per_channel_bias_qspec,
)
from executorch.backends.arm.quantizer.quantization_config import QuantizationConfig
from executorch.backends.cortex_m.quantizer.quantizer_reporter import (
SUPPORTED_QCONFIGS,
SUPPORTED_QSPECS,
)
from torch.fx import Node
from torchao.quantization.pt2e import (
HistogramObserver,
Expand Down Expand Up @@ -156,6 +152,7 @@ def get_bias_qspec(
INT8_ACTIVATION_PER_TENSOR_QSPEC,
INT8_WEIGHT_PER_TENSOR_QSPEC,
_get_int32_bias_qspec,
f"{__name__}.INT8_PER_TENSOR_CONFIG",
)


Expand All @@ -164,25 +161,5 @@ def get_bias_qspec(
INT8_ACTIVATION_PER_TENSOR_QSPEC,
INT8_WEIGHT_PER_CHANNEL_QSPEC,
_get_int32_per_channel_bias_qspec,
)


# Register supported quantization configs and qspecs in the reporter for human-readable reporting
# MLETORCH-1854: Temporary solution, refactor to automatically register these instead
SUPPORTED_QCONFIGS.update(
{
INT8_PER_CHANNEL_CONFIG: f"{__name__}.INT8_PER_CHANNEL_QCONFIG",
INT8_PER_TENSOR_CONFIG: f"{__name__}.INT8_PER_TENSOR_QCONFIG",
}
)

SUPPORTED_QSPECS.update(
{
INT8_ACTIVATION_PER_TENSOR_QSPEC: "INT8_ACTIVATION_PER_TENSOR_QSPEC",
INT8_ACTIVATION_PER_CHANNEL_QSPEC: "INT8_ACTIVATION_PER_CHANNEL_QSPEC",
INT8_WEIGHT_PER_TENSOR_QSPEC: "INT8_WEIGHT_PER_TENSOR_QSPEC",
INT8_WEIGHT_PER_CHANNEL_QSPEC: "INT8_WEIGHT_PER_CHANNEL_QSPEC",
INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC: "INT8_WEIGHT_PER_CHANNEL_TRANSPOSE_QSPEC",
SOFTMAX_OUTPUT_FIXED_QSPEC: "SOFTMAX_OUTPUT_FIXED_QSPEC",
}
f"{__name__}.INT8_PER_CHANNEL_CONFIG",
)
2 changes: 1 addition & 1 deletion backends/cortex_m/quantizer/quantizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@
INT8_PER_CHANNEL_CONFIG,
INT8_PER_TENSOR_CONFIG,
)
from executorch.backends.cortex_m.quantizer.quantizer_reporter import QuantizerReporter
from executorch.backends.cortex_m.quantizer.quantizer_support import (
__name__ as cortex_m_quantizer_support_module,
CONV_OP_PATTERNS,
CONV_TRANSPOSE_OP_PATTERNS,
CORTEX_M_QUANTIZER_SUPPORT_DICT,
)
from executorch.backends.cortex_m.quantizer_reporter import QuantizerReporter
from torch._ops import OpOverload
from torch.fx import GraphModule
from torchao.quantization.pt2e.quantizer import ComposableQuantizer, Quantizer
Expand Down
Loading
Loading