diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-09-22 10:24:23 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-10-03 08:57:23 +0000 |
commit | 40b441905760846e9fdaca283a4a4de038a6ef0d (patch) | |
tree | 38a4f6b5122bfaf44a2a33e90b331a2e1a30b113 /src/cpu/kernels/CpuAddKernel.cpp | |
parent | ff81de5a9a0f6b9331c3b112cc2aed552f0482a9 (diff) | |
download | ComputeLibrary-40b441905760846e9fdaca283a4a4de038a6ef0d.tar.gz |
Optimize CPU add layer on quantized data
* Use fixed-point arithmetic where possible.
* Various optimization for the FP32-based implementation.
This implementation is kept as the fall-back solution
in case of unrealistic quantization parameters that exceed
the range of fixed-point solution.
Resolves: COMPMID-5458
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/CpuAddKernel.cpp')
-rw-r--r-- | src/cpu/kernels/CpuAddKernel.cpp | 22 |
1 files changed, 20 insertions, 2 deletions
diff --git a/src/cpu/kernels/CpuAddKernel.cpp b/src/cpu/kernels/CpuAddKernel.cpp index 8fc82ec184..47ff6abf17 100644 --- a/src/cpu/kernels/CpuAddKernel.cpp +++ b/src/cpu/kernels/CpuAddKernel.cpp @@ -49,6 +49,22 @@ namespace static const std::vector<CpuAddKernel::AddKernel> available_kernels = { { + "neon_qu8_add_fixedpoint", + [](const CpuAddKernelDataTypeISASelectorData & data) + { + return (data.dt == DataType::QASYMM8) && data.can_use_fixedpoint; + }, + REGISTER_FP32_NEON(arm_compute::cpu::add_q8_neon_fixedpoint<uint8_t>) + }, + { + "neon_qs8_add_fixedpoint", + [](const CpuAddKernelDataTypeISASelectorData & data) + { + return (data.dt == DataType::QASYMM8_SIGNED) && data.can_use_fixedpoint; + }, + REGISTER_FP32_NEON(arm_compute::cpu::add_q8_neon_fixedpoint<int8_t>) + }, + { "neon_fp32_add_as_1d_array", [](const CpuAddKernelDataTypeISASelectorData & data) { @@ -222,8 +238,9 @@ Status validate_arguments(const ITensorInfo &src0, const ITensorInfo &src1, cons "Wrong shape for dst"); } + const auto can_use_fixedpoint = add_q8_neon_fixedpoint_possible(&src0, &src1, &dst); const auto uk = CpuAddKernel::get_implementation<CpuAddKernelDataTypeISASelectorData>(CpuAddKernelDataTypeISASelectorData{ src0.data_type(), - CPUInfo::get().get_isa(), can_interpret_inputs_as_1d_array(src0, src1) }); + CPUInfo::get().get_isa(), can_interpret_inputs_as_1d_array(src0, src1), can_use_fixedpoint }); ARM_COMPUTE_RETURN_ERROR_ON(uk == nullptr || uk->ukernel == nullptr); return Status{}; @@ -259,8 +276,9 @@ void CpuAddKernel::configure(const ITensorInfo *src0, const ITensorInfo *src1, I ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*src0, *src1, *dst, policy)); _can_interpret_inputs_as_1d_array = can_interpret_inputs_as_1d_array(*src0, *src1); + const auto can_use_fixedpoint = add_q8_neon_fixedpoint_possible(src0, src1, dst); const auto uk = CpuAddKernel::get_implementation<CpuAddKernelDataTypeISASelectorData>(CpuAddKernelDataTypeISASelectorData{ src0->data_type(), - CPUInfo::get().get_isa(), _can_interpret_inputs_as_1d_array }); + CPUInfo::get().get_isa(), _can_interpret_inputs_as_1d_array, can_use_fixedpoint }); ARM_COMPUTE_ERROR_ON_NULLPTR(uk); |