diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-10-20 11:18:17 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-10-20 14:34:47 +0000 |
commit | 9fc0b5c484e0f6cfe52009719ebccc179ada1112 (patch) | |
tree | 60216895d51bd161f28f1bd1f49d5c2e9d1c6715 /src/cpu/kernels/add | |
parent | 6782452c16a286a6dd4a071cfc70bbbcbabb20be (diff) | |
download | ComputeLibrary-9fc0b5c484e0f6cfe52009719ebccc179ada1112.tar.gz |
Update reinterpret tensor as 1D for CPU add
* Use the same implementation as other layers.
Resolves: COMPMID-5108
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I5a50259b398b71ca1f61b5ee8daa539bf8263fac
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8501
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/add')
-rw-r--r-- | src/cpu/kernels/add/generic/neon/fp16.cpp | 5 | ||||
-rw-r--r-- | src/cpu/kernels/add/generic/neon/fp32.cpp | 5 | ||||
-rw-r--r-- | src/cpu/kernels/add/generic/neon/impl.cpp | 38 | ||||
-rw-r--r-- | src/cpu/kernels/add/generic/neon/impl.h | 3 | ||||
-rw-r--r-- | src/cpu/kernels/add/generic/neon/integer.cpp | 15 | ||||
-rw-r--r-- | src/cpu/kernels/add/list.h | 5 |
6 files changed, 0 insertions, 71 deletions
diff --git a/src/cpu/kernels/add/generic/neon/fp16.cpp b/src/cpu/kernels/add/generic/neon/fp16.cpp index bb6636af1e..1e3bc3c986 100644 --- a/src/cpu/kernels/add/generic/neon/fp16.cpp +++ b/src/cpu/kernels/add/generic/neon/fp16.cpp @@ -33,11 +33,6 @@ void add_fp16_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const { return add_same_neon<float16_t>(src0, src1, dst, policy, window); } - -void add_fp16_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -{ - return add_same_neon_as_1d_array<float16_t>(src0, src1, dst, policy, window); -} } } // namespace arm_compute #endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ diff --git a/src/cpu/kernels/add/generic/neon/fp32.cpp b/src/cpu/kernels/add/generic/neon/fp32.cpp index 1d313a191d..1f599b1968 100644 --- a/src/cpu/kernels/add/generic/neon/fp32.cpp +++ b/src/cpu/kernels/add/generic/neon/fp32.cpp @@ -32,10 +32,5 @@ void add_fp32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const { return add_same_neon<float>(src0, src1, dst, policy, window); } - -void add_fp32_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -{ - return add_same_neon_as_1d_array<float>(src0, src1, dst, policy, window); -} } } // namespace arm_compute diff --git a/src/cpu/kernels/add/generic/neon/impl.cpp b/src/cpu/kernels/add/generic/neon/impl.cpp index 0f7b31c754..1a0b44fa8c 100644 --- a/src/cpu/kernels/add/generic/neon/impl.cpp +++ b/src/cpu/kernels/add/generic/neon/impl.cpp @@ -128,35 +128,6 @@ void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const } } -template <typename ScalarType> -void add_same_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -{ - const ScalarType *src0_ptr = reinterpret_cast<const ScalarType *>(src0->buffer()); - const ScalarType *src1_ptr = reinterpret_cast<const ScalarType *>(src1->buffer()); - ScalarType *dst_ptr = reinterpret_cast<ScalarType *>(dst->buffer()); - - constexpr int window_step_x = 16 / sizeof(ScalarType); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - - int x = window_start_x; - for(; x <= (window_end_x - window_step_x); x += window_step_x) - { - const auto val1 = wrapper::vloadq(src0_ptr + x); - const auto val2 = wrapper::vloadq(src1_ptr + x); - const auto res = (policy == ConvertPolicy::SATURATE) ? wrapper::vqadd(val1, val2) : wrapper::vadd(val1, val2); - wrapper::vstore(dst_ptr + x, res); - } - - // Compute left-over elements - for(; x < window_end_x; ++x) - { - const auto val1 = *(src0_ptr + x); - const auto val2 = *(src1_ptr + x); - *(dst_ptr + x) = (policy == ConvertPolicy::SATURATE) ? wrapper::add_sat(val1, val2) : val1 + val2; - } -} - bool add_q8_neon_fixedpoint_possible(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst) { const auto iq0 = src0->quantization_info().uniform(); @@ -383,15 +354,6 @@ template void add_same_neon<int16_t>(const ITensor *src0, const ITensor *src1, I template void add_same_neon<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); #endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ -template void add_same_neon_as_1d_array<float>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); -template void add_same_neon_as_1d_array<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); -template void add_same_neon_as_1d_array<int32_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); -template void add_same_neon_as_1d_array<int16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); - -#if defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) -template void add_same_neon_as_1d_array<float16_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); -#endif /* (__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ - template void add_q8_neon_fixedpoint<int8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); template void add_q8_neon_fixedpoint<uint8_t>(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); diff --git a/src/cpu/kernels/add/generic/neon/impl.h b/src/cpu/kernels/add/generic/neon/impl.h index e6a12fb4c0..91f347ff9c 100644 --- a/src/cpu/kernels/add/generic/neon/impl.h +++ b/src/cpu/kernels/add/generic/neon/impl.h @@ -33,9 +33,6 @@ namespace cpu template <typename ScalarType> void add_same_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); -template <typename ScalarType> -void add_same_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window); - bool add_q8_neon_fixedpoint_possible(const ITensorInfo *src0, const ITensorInfo *src1, const ITensorInfo *dst); template <typename ScalarType> diff --git a/src/cpu/kernels/add/generic/neon/integer.cpp b/src/cpu/kernels/add/generic/neon/integer.cpp index ffead03474..5698d6d552 100644 --- a/src/cpu/kernels/add/generic/neon/integer.cpp +++ b/src/cpu/kernels/add/generic/neon/integer.cpp @@ -42,20 +42,5 @@ void add_s32_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, const { return add_same_neon<int32_t>(src0, src1, dst, policy, window); } - -void add_u8_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -{ - return add_same_neon_as_1d_array<uint8_t>(src0, src1, dst, policy, window); -} - -void add_s16_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -{ - return add_same_neon_as_1d_array<int16_t>(src0, src1, dst, policy, window); -} - -void add_s32_neon_as_1d_array(const ITensor *src0, const ITensor *src1, ITensor *dst, const ConvertPolicy &policy, const Window &window) -{ - return add_same_neon_as_1d_array<int32_t>(src0, src1, dst, policy, window); -} } } // namespace arm_compute diff --git a/src/cpu/kernels/add/list.h b/src/cpu/kernels/add/list.h index 0285b231e0..7cdb70fd9e 100644 --- a/src/cpu/kernels/add/list.h +++ b/src/cpu/kernels/add/list.h @@ -38,15 +38,10 @@ DECLARE_ADD_KERNEL(add_qasymm8_neon); DECLARE_ADD_KERNEL(add_qasymm8_signed_neon); DECLARE_ADD_KERNEL(add_qsymm16_neon); DECLARE_ADD_KERNEL(add_fp32_neon); -DECLARE_ADD_KERNEL(add_fp32_neon_as_1d_array); DECLARE_ADD_KERNEL(add_fp16_neon); -DECLARE_ADD_KERNEL(add_fp16_neon_as_1d_array); DECLARE_ADD_KERNEL(add_u8_neon); -DECLARE_ADD_KERNEL(add_u8_neon_as_1d_array); DECLARE_ADD_KERNEL(add_s16_neon); -DECLARE_ADD_KERNEL(add_s16_neon_as_1d_array); DECLARE_ADD_KERNEL(add_s32_neon); -DECLARE_ADD_KERNEL(add_s32_neon_as_1d_array); DECLARE_ADD_KERNEL(add_fp32_sve); DECLARE_ADD_KERNEL(add_fp16_sve); DECLARE_ADD_KERNEL(add_u8_sve); |