aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/CpuAddKernel.cpp
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2022-09-22 10:24:23 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2022-10-03 08:57:23 +0000
commit40b441905760846e9fdaca283a4a4de038a6ef0d (patch)
tree38a4f6b5122bfaf44a2a33e90b331a2e1a30b113 /src/cpu/kernels/CpuAddKernel.cpp
parentff81de5a9a0f6b9331c3b112cc2aed552f0482a9 (diff)
downloadComputeLibrary-40b441905760846e9fdaca283a4a4de038a6ef0d.tar.gz
Optimize CPU add layer on quantized data
* Use fixed-point arithmetic where possible. * Various optimization for the FP32-based implementation. This implementation is kept as the fall-back solution in case of unrealistic quantization parameters that exceed the range of fixed-point solution. Resolves: COMPMID-5458 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuAddKernel.cpp')
-rw-r--r--src/cpu/kernels/CpuAddKernel.cpp22
1 files changed, 20 insertions, 2 deletions
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp
index 8fc82ec184..47ff6abf17 100644
--- a/src/cpu/kernels/CpuAddKernel.cpp
+++ b/src/cpu/kernels/CpuAddKernel.cpp
@@ -49,6 +49,22 @@ namespace
static const std::vector<CpuAddKernel::AddKernel> available_kernels =
{
{
+ "neon_qu8_add_fixedpoint",
+ [](const CpuAddKernelDataTypeISASelectorData & data)
+ {
+ return (data.dt == DataType::QASYMM8) && data.can_use_fixedpoint;
+ },
+ REGISTER_FP32_NEON(arm_compute::cpu::add_q8_neon_fixedpoint<uint8_t>)
+ },
+ {
+ "neon_qs8_add_fixedpoint",
+ [](const CpuAddKernelDataTypeISASelectorData & data)
+ {
+ return (data.dt == DataType::QASYMM8_SIGNED) && data.can_use_fixedpoint;
+ },
+ REGISTER_FP32_NEON(arm_compute::cpu::add_q8_neon_fixedpoint<int8_t>)
+ },
+ {
"neon_fp32_add_as_1d_array",
[](const CpuAddKernelDataTypeISASelectorData & data)
{
@@ -222,8 +238,9 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons
"Wrong shape for dst");
}
+ const auto can_use_fixedpoint = add_q8_neon_fixedpoint_possible(&src0, &src1, &dst);
const auto uk = CpuAddKernel::get_implementation<CpuAddKernelDataTypeISASelectorData>(CpuAddKernelDataTypeISASelectorData{ src0.data_type(),
- CPUInfo::get().get_isa(), can_interpret_inputs_as_1d_array(src0, src1) });
+ CPUInfo::get().get_isa(), can_interpret_inputs_as_1d_array(src0, src1), can_use_fixedpoint });
ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr);
return Status{};
@@ -259,8 +276,9 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy));
_can_interpret_inputs_as_1d_array = can_interpret_inputs_as_1d_array(*src0, *src1);
+ const auto can_use_fixedpoint = add_q8_neon_fixedpoint_possible(src0, src1, dst);
const auto uk = CpuAddKernel::get_implementation<CpuAddKernelDataTypeISASelectorData>(CpuAddKernelDataTypeISASelectorData{ src0->data_type(),
- CPUInfo::get().get_isa(), _can_interpret_inputs_as_1d_array });
+ CPUInfo::get().get_isa(), _can_interpret_inputs_as_1d_array, can_use_fixedpoint });
ARM_COMPUTE_ERROR_ON_NULLPTR(uk);