aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWilliam Isaksson <william.isaksson@arm.com>2024-01-10 12:28:04 +0100
committerRickard Bolin <rickard.bolin@arm.com>2024-01-18 13:24:26 +0000
commit56e5f0c22ebc995dae13c6b72b08b28934a7871a (patch)
treec0e7a25770d6d3dc8f15782a0e4529aff081ef3c
parent84fe2f60d5c6a25fa73d081cc90ee858ebca821d (diff)
downloadethos-u-vela-56e5f0c22ebc995dae13c6b72b08b28934a7871a.tar.gz
CONV ops int16 tests failed after TensorFlow update
Adds support for setting the accumulator type using the quantized_bias_type attribute Change-Id: Ibde1149143b510a1c650a5a037d3ab92d878d7cd Signed-off-by: William Isaksson <william.isaksson@arm.com>
-rw-r--r--ethosu/vela/api.py15
-rw-r--r--ethosu/vela/high_level_command_to_npu_op.py4
-rw-r--r--ethosu/vela/register_command_stream_generator.py13
-rw-r--r--ethosu/vela/register_command_stream_util.py13
-rw-r--r--ethosu/vela/tflite_supported_operators.py11
5 files changed, 40 insertions, 16 deletions
diff --git a/ethosu/vela/api.py b/ethosu/vela/api.py
index 589a283..7125e88 100644
--- a/ethosu/vela/api.py
+++ b/ethosu/vela/api.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -27,7 +27,7 @@ import numpy
API_VERSION_MAJOR = 1
-API_VERSION_MINOR = 4
+API_VERSION_MINOR = 5
API_VERSION = f"{API_VERSION_MAJOR}.{API_VERSION_MINOR}"
@@ -273,6 +273,16 @@ class NpuKernel:
self.dilation_y = dilation_y
+class NpuAccumulatorType(Enum):
+ """
+ Accumulator dtype of NPU operation
+ """
+
+ Default = auto()
+ Int32 = auto()
+ Int40 = auto()
+
+
class NpuOperationType(Enum):
"""
Type of NPU operation
@@ -343,6 +353,7 @@ class NpuBlockOperation(NpuOperation):
self.fused_quantize: bool = False
# IFM upscaling to be applied
self.ifm_upscale: NpuResamplingMode = NpuResamplingMode.NONE
+ self.accumulator_type: NpuAccumulatorType = NpuAccumulatorType.Default
class NpuConv2DOperation(NpuBlockOperation):
diff --git a/ethosu/vela/high_level_command_to_npu_op.py b/ethosu/vela/high_level_command_to_npu_op.py
index 53df096..06d91a6 100644
--- a/ethosu/vela/high_level_command_to_npu_op.py
+++ b/ethosu/vela/high_level_command_to_npu_op.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -65,6 +65,7 @@ from .operation import Padding
from .operation import RoundingMode
from .register_command_stream_generator import generate_command_stream
from .register_command_stream_util import BASE_PTR_INDEX_MEM2MEM
+from .register_command_stream_util import to_npu_acc_type
from .register_command_stream_util import to_npu_kernel
from .register_command_stream_util import UNARY_ELEMWISE_OPS
from .shape4d import Shape4D
@@ -545,6 +546,7 @@ def set_common_op_fields(npu_op: NpuBlockOperation, cmd: NpuStripe, arch: Archit
npu_op.padding = create_padding(cmd, op, npu_op)
npu_op.kernel = to_npu_kernel(op.kernel)
npu_op.ifm_upscale = resampling_mode_inv_map[op.ifm_resampling_mode]
+ npu_op.accumulator_type = to_npu_acc_type(op.attrs.get("quantized_bias_type", None))
return npu_op
diff --git a/ethosu/vela/register_command_stream_generator.py b/ethosu/vela/register_command_stream_generator.py
index 9d9a1e6..ec01d3e 100644
--- a/ethosu/vela/register_command_stream_generator.py
+++ b/ethosu/vela/register_command_stream_generator.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -31,6 +31,7 @@ import numpy as np
from . import scaling
from .api import NpuAccelerator
+from .api import NpuAccumulatorType
from .api import NpuActivation
from .api import NpuActivationOp
from .api import NpuAddressRange
@@ -270,6 +271,11 @@ acc_format_map = {
SHRAMElements.Acc40: acc_format.INT_40BIT.value,
}
+npu_acc_format_map = {
+ NpuAccumulatorType.Int32: acc_format.INT_32BIT.value,
+ NpuAccumulatorType.Int40: acc_format.INT_40BIT.value,
+}
+
resampling_mode_map = {
NpuResamplingMode.NONE: resampling_mode.NONE,
NpuResamplingMode.NEAREST: resampling_mode.NEAREST,
@@ -574,7 +580,10 @@ def generate_shram_registers(
emit.cmd0_with_param(cmd0.NPU_SET_AB_START, arch_block_config.layout.ab_start)
if has_ifm2(npu_op):
emit.cmd0_with_param(cmd0.NPU_SET_IFM2_IB_START, arch_block_config.layout.ib_start2)
- emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, acc_format_map[arch_block_config.acc_type])
+ if npu_op.accumulator_type != NpuAccumulatorType.Default:
+ emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, npu_acc_format_map[npu_op.accumulator_type])
+ else:
+ emit.cmd0_with_param(cmd0.NPU_SET_ACC_FORMAT, acc_format_map[arch_block_config.acc_type])
def get_block_config_for_npu_op(
diff --git a/ethosu/vela/register_command_stream_util.py b/ethosu/vela/register_command_stream_util.py
index 74c4f90..8a6f94e 100644
--- a/ethosu/vela/register_command_stream_util.py
+++ b/ethosu/vela/register_command_stream_util.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -21,6 +21,7 @@ from typing import NamedTuple
from typing import Optional
from . import numeric_util
+from .api import NpuAccumulatorType
from .api import NpuActivationOp
from .api import NpuAddressRange
from .api import NpuBlockOperation
@@ -42,6 +43,7 @@ from .errors import ByteSizeError
from .operation import Kernel
from .operation import PointXYZ
from .tensor import TensorFormat
+from .tflite.TensorType import TensorType
from ethosu.vela.range_set import AccessDirection
from ethosu.vela.range_set import MemoryAccessSet
from ethosu.vela.range_set import MemoryRangeSet
@@ -74,6 +76,15 @@ def check_length(length, required_multiple):
check_size(length, required_multiple, "length")
+def to_npu_acc_type(accType: TensorType) -> NpuAccumulatorType:
+ if accType == TensorType.INT32:
+ return NpuAccumulatorType.Int32
+ elif accType == TensorType.INT64:
+ return NpuAccumulatorType.Int40
+ else:
+ return NpuAccumulatorType.Default
+
+
def to_npu_kernel(kernel: Kernel) -> NpuKernel:
"""Converts the given internally used kernel object to NpuKernel (of public API)"""
return NpuKernel(
diff --git a/ethosu/vela/tflite_supported_operators.py b/ethosu/vela/tflite_supported_operators.py
index ada2136..48813fe 100644
--- a/ethosu/vela/tflite_supported_operators.py
+++ b/ethosu/vela/tflite_supported_operators.py
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright 2020-2023 Arm Limited and/or its affiliates <open-source-office@arm.com>
+# SPDX-FileCopyrightText: Copyright 2020-2024 Arm Limited and/or its affiliates <open-source-office@arm.com>
#
# SPDX-License-Identifier: Apache-2.0
#
@@ -58,7 +58,6 @@ class TFLiteSupportedOperators:
depthwise_convolution_ops = set((Op.DepthwiseConv2DBias,))
transpose_convolution_ops = set((Op.Conv2DBackpropInput,))
convolution_like_ops = convolution_ops | depthwise_convolution_ops | transpose_convolution_ops
- conv_depth_fc_op = convolution_ops | depthwise_convolution_ops | set((Op.FullyConnected,))
max_pooling_ops = Op.op_set(Op.is_maxpool_op)
avg_pooling_ops = Op.op_set(Op.is_avgpool_op)
pooling_ops = set((Op.ReduceSum,)) | max_pooling_ops | avg_pooling_ops
@@ -239,8 +238,6 @@ class TFLiteSupportedOperators:
self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_shape)
self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_type)
self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_bias_40bit)
- for op_type in TFLiteSupportedOperators.conv_depth_fc_op:
- self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_no_quantized_bias_type)
# Transpose Conv specific checks:
for op_type in TFLiteSupportedOperators.transpose_convolution_ops:
self.specific_constraints[op_type].append(TFLiteSupportedOperators.constraint_tconv_stride)
@@ -534,12 +531,6 @@ class TFLiteSupportedOperators:
return valid, f"Tensor '{bias.name}' has values larger than 40-bits"
return True, "Op has no bias tensor, or it fits in 40-bit"
- def constraint_no_quantized_bias_type(op):
- "Attribute quantized_bias_type must not be set"
- quantized_bias_type = op.attrs.get("quantized_bias_type", False)
- valid = quantized_bias_type == 0
- return valid, f"Op has quantized_bias_type={quantized_bias_type}"
-
@staticmethod
def constraint_batch_size(op):
"IFM Tensor batch size must be 1"