From 56e5f0c22ebc995dae13c6b72b08b28934a7871a Mon Sep 17 00:00:00 2001 From: William Isaksson Date: Wed, 10 Jan 2024 12:28:04 +0100 Subject: CONV ops int16 tests failed after TensorFlow update Adds support for setting the accumulator type using the quantized_bias_type attribute Change-Id: Ibde1149143b510a1c650a5a037d3ab92d878d7cd Signed-off-by: William Isaksson --- ethosu/vela/api.py | 15 +++++++++++++-- ethosu/vela/high_level_command_to_npu_op.py | 4 +++- ethosu/vela/register_command_stream_generator.py | 13 +++++++++++-- ethosu/vela/register_command_stream_util.py | 13 ++++++++++++- ethosu/vela/tflite_supported_operators.py | 11 +---------- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py index 589a283..7125e88 100644 --- a/ethosu/vela/api.py +++ b/ethosu/vela/api.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -27,7 +27,7 @@ import numpy API_VERSION_MAJOR = 1 -API_VERSION_MINOR = 4 +API_VERSION_MINOR = 5 API_VERSION = f"{API_VERSION_MAJOR}.{API_VERSION_MINOR}" @@ -273,6 +273,16 @@ class NpuKernel: self.dilation_y = dilation_y +class NpuAccumulatorType(Enum): + """ + Accumulator dtype of NPU operation + """ + + Default = auto() + Int32 = auto() + Int40 = auto() + + class NpuOperationType(Enum): """ Type of NPU operation @@ -343,6 +353,7 @@ class NpuBlockOperation(NpuOperation): self.fused_quantize: bool = False # IFM upscaling to be applied self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE + self.accumulator_type: NpuAccumulatorType = NpuAccumulatorType.Default class NpuConv2DOperation(NpuBlockOperation): diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py index 53df096..06d91a6 100644 --- a/ethosu/vela/high_level_command_to_npu_op.py +++ b/ethosu/vela/high_level_command_to_npu_op.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -65,6 +65,7 @@ from .operation import Padding from .operation import RoundingMode from .register_command_stream_generator import generate_command_stream from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM +from .register_command_stream_util import to_npu_acc_type from .register_command_stream_util import to_npu_kernel from .register_command_stream_util import UNARY_ELEMWISE_OPS from .shape4d import Shape4D @@ -545,6 +546,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit npu_op.padding = create_padding(cmd, op, npu_op) npu_op.kernel = to_npu_kernel(op.kernel) npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode] + npu_op.accumulator_type = to_npu_acc_type(op.attrs.get("quantized_bias_type", None)) return npu_op diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py index 9d9a1e6..ec01d3e 100644 --- a/ethosu/vela/register_command_stream_generator.py +++ b/ethosu/vela/register_command_stream_generator.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -31,6 +31,7 @@ import numpy as np from . import scaling from .api import NpuAccelerator +from .api import NpuAccumulatorType from .api import NpuActivation from .api import NpuActivationOp from .api import NpuAddressRange @@ -270,6 +271,11 @@ acc_format_map = { SHRAMElements.Acc40: acc_format.INT_40BIT.value, } +npu_acc_format_map = { + NpuAccumulatorType.Int32: acc_format.INT_32BIT.value, + NpuAccumulatorType.Int40: acc_format.INT_40BIT.value, +} + resampling_mode_map = { NpuResamplingMode.NONE: resampling_mode.NONE, NpuResamplingMode.NEAREST: resampling_mode.NEAREST, @@ -574,7 +580,10 @@ def generate_shram_registers( emit.cmd0_with_param(cmd0.NPU_SET_AB_START, arch_block_config.layout.ab_start) if has_ifm2(npu_op): emit.cmd0_with_param(cmd0.NPU_SET_IFM2_IB_START, arch_block_config.layout.ib_start2) - emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, acc_format_map[arch_block_config.acc_type]) + if npu_op.accumulator_type != NpuAccumulatorType.Default: + emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, npu_acc_format_map[npu_op.accumulator_type]) + else: + emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, acc_format_map[arch_block_config.acc_type]) def get_block_config_for_npu_op( diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py index 74c4f90..8a6f94e 100644 --- a/ethosu/vela/register_command_stream_util.py +++ b/ethosu/vela/register_command_stream_util.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -21,6 +21,7 @@ from typing import NamedTuple from typing import Optional from . import numeric_util +from .api import NpuAccumulatorType from .api import NpuActivationOp from .api import NpuAddressRange from .api import NpuBlockOperation @@ -42,6 +43,7 @@ from .errors import ByteSizeError from .operation import Kernel from .operation import PointXYZ from .tensor import TensorFormat +from .tflite.TensorType import TensorType from ethosu.vela.range_set import AccessDirection from ethosu.vela.range_set import MemoryAccessSet from ethosu.vela.range_set import MemoryRangeSet @@ -74,6 +76,15 @@ def check_length(length, required_multiple): check_size(length, required_multiple, "length") +def to_npu_acc_type(accType: TensorType) -> NpuAccumulatorType: + if accType == TensorType.INT32: + return NpuAccumulatorType.Int32 + elif accType == TensorType.INT64: + return NpuAccumulatorType.Int40 + else: + return NpuAccumulatorType.Default + + def to_npu_kernel(kernel: Kernel) -> NpuKernel: """Converts the given internally used kernel object to NpuKernel (of public API)""" return NpuKernel( diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py index ada2136..48813fe 100644 --- a/ethosu/vela/tflite_supported_operators.py +++ b/ethosu/vela/tflite_supported_operators.py @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates +# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates # # SPDX-License-Identifier: Apache-2.0 # @@ -58,7 +58,6 @@ class TFLiteSupportedOperators: depthwise_convolution_ops = set((Op.DepthwiseConv2DBias,)) transpose_convolution_ops = set((Op.Conv2DBackpropInput,)) convolution_like_ops = convolution_ops | depthwise_convolution_ops | transpose_convolution_ops - conv_depth_fc_op = convolution_ops | depthwise_convolution_ops | set((Op.FullyConnected,)) max_pooling_ops = Op.op_set(Op.is_maxpool_op) avg_pooling_ops = Op.op_set(Op.is_avgpool_op) pooling_ops = set((Op.ReduceSum,)) | max_pooling_ops | avg_pooling_ops @@ -239,8 +238,6 @@ class TFLiteSupportedOperators: self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_shape) self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_type) self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_40bit) - for op_type in TFLiteSupportedOperators.conv_depth_fc_op: - self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_no_quantized_bias_type) # Transpose Conv specific checks: for op_type in TFLiteSupportedOperators.transpose_convolution_ops: self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_tconv_stride) @@ -534,12 +531,6 @@ class TFLiteSupportedOperators: return valid, f"Tensor '{bias.name}' has values larger than 40-bits" return True, "Op has no bias tensor, or it fits in 40-bit" - def constraint_no_quantized_bias_type(op): - "Attribute quantized_bias_type must not be set" - quantized_bias_type = op.attrs.get("quantized_bias_type", False) - valid = quantized_bias_type == 0 - return valid, f"Op has quantized_bias_type={quantized_bias_type}" - @staticmethod def constraint_batch_size(op): "IFM Tensor batch size must be 1" -- cgit v1.2.1