diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-05-15 11:28:59 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-05-20 11:52:47 +0000 |
commit | 7a7fe65a6bdd09fd08678ba2ddd8d0da18565bc6 (patch) | |
tree | b4cfb16a427ae96ce8e0f0cbffd150f9406b74a7 /src/core/NEON/kernels | |
parent | ee4833d0a16cd65ad4fa9de087ec92080716e996 (diff) | |
download | ComputeLibrary-7a7fe65a6bdd09fd08678ba2ddd8d0da18565bc6.tar.gz |
COMPMID-3461: Remove boolean template from NEArithmeticAddition
Change-Id: I5b29483acb469efe09e693746f04057e3a8f995e
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3210
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r-- | src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp | 44 |
1 files changed, 16 insertions, 28 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index 3532526eb8..f8ee578ef8 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,33 +26,20 @@ #include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" -#include <algorithm> -#include <arm_neon.h> -#include <cstdint> #include <map> #include <string> -using namespace arm_compute; - namespace arm_compute { -class Coordinates; -} // namespace arm_compute - namespace { -template <typename T, bool is_sat> -void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, ConvertPolicy policy, const Window &window) +template <typename T> +void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, const ConvertPolicy policy, const Window &window) { - ARM_COMPUTE_UNUSED(policy); - /** NEON vector tag type. */ using ExactTagType = typename wrapper::traits::neon_bitvector_tag_t<T, wrapper::traits::BitWidth::W128>; @@ -97,7 +84,7 @@ void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, ConvertPolic for(; x <= (window_end_x - window_step_x); x += window_step_x) { const auto non_broadcast_v = wrapper::vloadq(non_broadcast_input_ptr + x); - const auto res = is_sat ? wrapper::vqadd(broadcast_value_vec, non_broadcast_v) : wrapper::vadd(broadcast_value_vec, non_broadcast_v); + const auto res = (policy == ConvertPolicy::SATURATE) ? wrapper::vqadd(broadcast_value_vec, non_broadcast_v) : wrapper::vadd(broadcast_value_vec, non_broadcast_v); wrapper::vstore(output_ptr + x, res); } @@ -105,7 +92,7 @@ void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, ConvertPolic for(; x < window_end_x; ++x) { const auto non_broadcast_v = *(non_broadcast_input_ptr + x); - *(output_ptr + x) = is_sat ? wrapper::add_sat(broadcast_value, non_broadcast_v) : broadcast_value + non_broadcast_v; + *(output_ptr + x) = (policy == ConvertPolicy::SATURATE) ? wrapper::add_sat(broadcast_value, non_broadcast_v) : broadcast_value + non_broadcast_v; } }, broadcast_input, non_broadcast_input, output); @@ -132,7 +119,7 @@ void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, ConvertPolic { const auto val1 = wrapper::vloadq(input1_ptr + x); const auto val2 = wrapper::vloadq(input2_ptr + x); - const auto res = is_sat ? wrapper::vqadd(val1, val2) : wrapper::vadd(val1, val2); + const auto res = (policy == ConvertPolicy::SATURATE) ? wrapper::vqadd(val1, val2) : wrapper::vadd(val1, val2); wrapper::vstore(output_ptr + x, res); } @@ -141,7 +128,7 @@ void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, ConvertPolic { const auto val1 = *(input1_ptr + x); const auto val2 = *(input2_ptr + x); - *(output_ptr + x) = is_sat ? wrapper::add_sat(val1, val2) : val1 + val2; + *(output_ptr + x) = (policy == ConvertPolicy::SATURATE) ? wrapper::add_sat(val1, val2) : val1 + val2; } }, input1, input2, output); @@ -929,21 +916,21 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor { "add_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &add_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED }, { "add_wrap_QSYMM16_QSYMM16_QSYMM16", &add_QSYMM16_QSYMM16_QSYMM16 }, { "add_saturate_QSYMM16_QSYMM16_QSYMM16", &add_QSYMM16_QSYMM16_QSYMM16 }, - { "add_wrap_U8_U8_U8", &add_same<uint8_t, false> }, - { "add_saturate_U8_U8_U8", &add_same<uint8_t, true> }, + { "add_wrap_U8_U8_U8", &add_same<uint8_t> }, + { "add_saturate_U8_U8_U8", &add_same<uint8_t> }, { "add_wrap_S16_U8_S16", &add_S16_U8_S16 }, { "add_saturate_S16_U8_S16", &add_S16_U8_S16 }, { "add_wrap_U8_S16_S16", &add_U8_S16_S16 }, { "add_saturate_U8_S16_S16", &add_U8_S16_S16 }, { "add_wrap_U8_U8_S16", &add_U8_U8_S16 }, { "add_saturate_U8_U8_S16", &add_U8_U8_S16 }, - { "add_wrap_S16_S16_S16", &add_same<int16_t, false> }, - { "add_saturate_S16_S16_S16", &add_same<int16_t, true> }, - { "add_wrap_F32_F32_F32", &add_same<float, false> }, - { "add_saturate_F32_F32_F32", &add_same<float, false> }, + { "add_wrap_S16_S16_S16", &add_same<int16_t> }, + { "add_saturate_S16_S16_S16", &add_same<int16_t> }, + { "add_wrap_F32_F32_F32", &add_same<float> }, + { "add_saturate_F32_F32_F32", &add_same<float> }, #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - { "add_wrap_F16_F16_F16", &add_same<float16_t, false> }, - { "add_saturate_F16_F16_F16", &add_same<float16_t, false> }, + { "add_wrap_F16_F16_F16", &add_same<float16_t> }, + { "add_saturate_F16_F16_F16", &add_same<float16_t> }, #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ }; @@ -987,3 +974,4 @@ void NEArithmeticAdditionKernel::run(const Window &window, const ThreadInfo &inf (*_func)(_input1, _input2, _output, _policy, window); } +} // namespace arm_compute |