From 40b441905760846e9fdaca283a4a4de038a6ef0d Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Thu, 22 Sep 2022 10:24:23 +0100 Subject: Optimize CPU add layer on quantized data * Use fixed-point arithmetic where possible. * Various optimization for the FP32-based implementation. This implementation is kept as the fall-back solution in case of unrealistic quantization parameters that exceed the range of fixed-point solution. Resolves: COMPMID-5458 Signed-off-by: Viet-Hoa Do Change-Id: I221d2d3801ecaae4fe0b7cf6ae8ef00ca3743665 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8317 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/wrapper/intrinsics/intrinsics.h | 1 + src/core/NEON/wrapper/intrinsics/shr.h | 78 +++++++++++++++++++++++++++ 2 files changed, 79 insertions(+) create mode 100644 src/core/NEON/wrapper/intrinsics/shr.h (limited to 'src/core') diff --git a/src/core/NEON/wrapper/intrinsics/intrinsics.h b/src/core/NEON/wrapper/intrinsics/intrinsics.h index 0256e0a8c8..97975ebe7c 100644 --- a/src/core/NEON/wrapper/intrinsics/intrinsics.h +++ b/src/core/NEON/wrapper/intrinsics/intrinsics.h @@ -67,6 +67,7 @@ #include "src/core/NEON/wrapper/intrinsics/rev64.h" #include "src/core/NEON/wrapper/intrinsics/round.h" #include "src/core/NEON/wrapper/intrinsics/setlane.h" +#include "src/core/NEON/wrapper/intrinsics/shr.h" #include "src/core/NEON/wrapper/intrinsics/sin.h" #include "src/core/NEON/wrapper/intrinsics/sqrt.h" #include "src/core/NEON/wrapper/intrinsics/store.h" diff --git a/src/core/NEON/wrapper/intrinsics/shr.h b/src/core/NEON/wrapper/intrinsics/shr.h new file mode 100644 index 0000000000..69fc254b61 --- /dev/null +++ b/src/core/NEON/wrapper/intrinsics/shr.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2022 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_WRAPPER_SHR_H +#define ARM_COMPUTE_WRAPPER_SHR_H + +#include +#include + +namespace arm_compute +{ +namespace wrapper +{ + +#define VQRSHRN_IMPL(half_vtype, vtype, prefix, postfix) \ + template \ + inline half_vtype vqrshrn(const vtype &a) \ + { \ + return prefix##_##postfix(a, b); \ + } + +VQRSHRN_IMPL(int8x8_t, int16x8_t, vqrshrn_n, s16) +VQRSHRN_IMPL(uint8x8_t, uint16x8_t, vqrshrn_n, u16) +VQRSHRN_IMPL(int16x4_t, int32x4_t, vqrshrn_n, s32) +VQRSHRN_IMPL(uint16x4_t, uint32x4_t, vqrshrn_n, u32) +VQRSHRN_IMPL(int32x2_t, int64x2_t, vqrshrn_n, s64) +VQRSHRN_IMPL(uint32x2_t, uint64x2_t, vqrshrn_n, u64) + +#undef VQRSHRN_IMPL + +// This function is the mixed version of VQRSHRN and VQRSHRUN. +// The input vector is always signed integer, while the returned vector +// can be either signed or unsigned depending on the signedness of scalar type T. +#define VQRSHRN_EX_IMPL(half_vtype, vtype, prefix_signed, prefix_unsigned, postfix) \ + template \ + inline typename std::enable_if::value && std::is_signed::value, half_vtype>::type \ + vqrshrn_ex(const vtype &a) \ + { \ + return prefix_signed##_##postfix(a, b); \ + } \ + \ + template \ + inline typename std::enable_if::value && !std::is_signed::value, u##half_vtype>::type \ + vqrshrn_ex(const vtype &a) \ + { \ + return prefix_unsigned##_##postfix(a, b); \ + } + +VQRSHRN_EX_IMPL(int8x8_t, int16x8_t, vqrshrn_n, vqrshrun_n, s16) +VQRSHRN_EX_IMPL(int16x4_t, int32x4_t, vqrshrn_n, vqrshrun_n, s32) +VQRSHRN_EX_IMPL(int32x2_t, int64x2_t, vqrshrn_n, vqrshrun_n, s64) + +#undef VQRSHRN_EX_IMPL + +} // namespace wrapper +} // namespace arm_compute +#endif /* ARM_COMPUTE_WRAPPER_SHR_H */ -- cgit v1.2.1