From 40b441905760846e9fdaca283a4a4de038a6ef0d Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Thu, 22 Sep 2022 10:24:23 +0100 Subject: Optimize CPU add layer on quantized data * Use fixed-point arithmetic where possible. * Various optimization for the FP32-based implementation. This implementation is kept as the fall-back solution in case of unrealistic quantization parameters that exceed the range of fixed-point solution. Resolves: COMPMID-5458 Signed-off-by: Viet-Hoa Do Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/cpu/kernels/add/generic/neon/impl.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/cpu/kernels/add/generic/neon/impl.h') diff --git a/src/cpu/kernels/add/generic/neon/impl.h b/src/cpu/kernels/add/generic/neon/impl.h index f8f0f517b0..e6a12fb4c0 100644 --- a/src/cpu/kernels/add/generic/neon/impl.h +++ b/src/cpu/kernels/add/generic/neon/impl.h @@ -35,6 +35,11 @@ void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const template void add_same_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); + +bool add_q8_neon_fixedpoint_possible(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); + +template +void add_q8_neon_fixedpoint(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); } // namespace cpu } // namespace arm_compute #endif // SRC_CORE_NEON_KERNELS_ADD_IMPL_H \ No newline at end of file -- cgit v1.2.1