From 40b441905760846e9fdaca283a4a4de038a6ef0d Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Thu, 22 Sep 2022 10:24:23 +0100 Subject: Optimize CPU add layer on quantized data * Use fixed-point arithmetic where possible. * Various optimization for the FP32-based implementation. This implementation is kept as the fall-back solution in case of unrealistic quantization parameters that exceed the range of fixed-point solution. Resolves: COMPMID-5458 Signed-off-by: Viet-Hoa Do Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/cpu/kernels/CpuAddKernel.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'src/cpu/kernels/CpuAddKernel.cpp') diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp index 8fc82ec184..47ff6abf17 100644 --- a/src/cpu/kernels/CpuAddKernel.cpp +++ b/src/cpu/kernels/CpuAddKernel.cpp @@ -48,6 +48,22 @@ namespace { static const std::vector available_kernels = { + { + "neon_qu8_add_fixedpoint", + [](const CpuAddKernelDataTypeISASelectorData & data) + { + return (data.dt == DataType::QASYMM8) && data.can_use_fixedpoint; + }, + REGISTER_FP32_NEON(arm_compute::cpu::add_q8_neon_fixedpoint) + }, + { + "neon_qs8_add_fixedpoint", + [](const CpuAddKernelDataTypeISASelectorData & data) + { + return (data.dt == DataType::QASYMM8_SIGNED) && data.can_use_fixedpoint; + }, + REGISTER_FP32_NEON(arm_compute::cpu::add_q8_neon_fixedpoint) + }, { "neon_fp32_add_as_1d_array", [](const CpuAddKernelDataTypeISASelectorData & data) @@ -222,8 +238,9 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons "Wrong shape for dst"); } + const auto can_use_fixedpoint = add_q8_neon_fixedpoint_possible(&src0, &src1, &dst); const auto uk = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{ src0.data_type(), - CPUInfo::get().get_isa(), can_interpret_inputs_as_1d_array(src0, src1) }); + CPUInfo::get().get_isa(), can_interpret_inputs_as_1d_array(src0, src1), can_use_fixedpoint }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; @@ -259,8 +276,9 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy)); _can_interpret_inputs_as_1d_array = can_interpret_inputs_as_1d_array(*src0, *src1); + const auto can_use_fixedpoint = add_q8_neon_fixedpoint_possible(src0, src1, dst); const auto uk = CpuAddKernel::get_implementation(CpuAddKernelDataTypeISASelectorData{ src0->data_type(), - CPUInfo::get().get_isa(), _can_interpret_inputs_as_1d_array }); + CPUInfo::get().get_isa(), _can_interpret_inputs_as_1d_array, can_use_fixedpoint }); ARM_COMPUTE_ERROR_ON_NULLPTR(uk); -- cgit v1.2.1