diff options
Diffstat (limited to 'src/core/cpu/kernels/softmax/impl')
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/neon/list.h (renamed from src/core/cpu/kernels/softmax/impl/NEON/list.h) | 0 | ||||
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/sve/impl.cpp | 185 | ||||
-rw-r--r-- | src/core/cpu/kernels/softmax/impl/sve/list.h (renamed from src/core/cpu/kernels/softmax/impl/SVE/list.h) | 142 |
3 files changed, 191 insertions, 136 deletions
diff --git a/src/core/cpu/kernels/softmax/impl/NEON/list.h b/src/core/cpu/kernels/softmax/impl/neon/list.h index 5ebee31272..5ebee31272 100644 --- a/src/core/cpu/kernels/softmax/impl/NEON/list.h +++ b/src/core/cpu/kernels/softmax/impl/neon/list.h diff --git a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp new file mode 100644 index 0000000000..4ed5a4fbea --- /dev/null +++ b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp @@ -0,0 +1,185 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#if defined(ENABLE_SVE) +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/Traits.h" +#include "arm_compute/core/utils/misc/Traits.h" +#include "src/core/NEON/SVEMath.h" +#include "src/core/NEON/wrapper/intrinsics/intrinsics.h" +#include <arm_sve.h> + +namespace arm_compute +{ +namespace cpu +{ +template <typename ScalarType> +void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window) +{ + const auto all_true_pg = wrapper::svptrue<ScalarType>(); + const auto window_start_x = static_cast<int>(window.x().start()); + const auto window_end_x = static_cast<int>(window.x().end()); + + Window win{ window }; + win.set(Window::DimX, Window::Dimension(0, 1, 1)); + Iterator input(in, win); + Iterator output(out, win); + + execute_window_loop(win, [&](const Coordinates &) + { + // Get pointers + const auto in_ptr = reinterpret_cast<const ScalarType *>(input.ptr()); + const auto out_ptr = reinterpret_cast<ScalarType *>(output.ptr()); + + // Init max value + auto vec_max = wrapper::svdup_n(support::cpp11::lowest<ScalarType>()); + + int x = window_start_x; + svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); + do + { + const auto current_value = svld1(pg, in_ptr + x); + vec_max = svmax_m(pg, vec_max, current_value); + + x += wrapper::svcnt<ScalarType>(); + pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); + } + while(svptest_any(all_true_pg, pg)); + + auto max_val = svmaxv(all_true_pg, vec_max); + + *out_ptr = max_val; + }, + input, output); +} + +template <typename ScalarType> +void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp, + ITensor *out, const float beta, bool is_log, const Window &window) +{ + const int start_x = in->info()->valid_region().anchor.x(); + const int input_width = in->info()->valid_region().shape.x(); + + Iterator in_it(in, window); + Iterator max_it(max, window); + Iterator out_it(out, window); + + const auto all_true_pg = wrapper::svptrue<ScalarType>(); + + execute_window_loop(window, [&](const Coordinates &) + { + /* Get pointers */ + const auto in_ptr = reinterpret_cast<const ScalarType *>(in_it.ptr()) + start_x; + const auto out_ptr = reinterpret_cast<ScalarType *>(out_it.ptr()) + start_x; + const auto tmp_ptr = reinterpret_cast<ScalarType *>(tmp); + + ScalarType sum{ 0 }; + + /* Compute exponentials and sum */ + { + /* Get max value */ + const auto max_val = *reinterpret_cast<const ScalarType *>(max_it.ptr()); + const auto vec_max = wrapper::svdup_n(max_val); + + /* Init sum to zero */ + auto vec_sum = wrapper::svdup_n(static_cast<ScalarType>(0)); + + /* Loop over row and compute exponentials and sum */ + int x = 0; + svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width); + do + { + auto vec_elements = svld1(pg, in_ptr + x); + vec_elements = svsub_z(pg, vec_elements, vec_max); + if(is_log) + { + vec_elements = svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta))); + vec_sum = svadd_m(pg, vec_sum, wrapper::svexp_z(pg, vec_elements)); + } + else + { + vec_elements = wrapper::svexp_z(pg, svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta)))); + vec_sum = svadd_m(pg, vec_sum, vec_elements); + } + svst1(pg, tmp_ptr + x, vec_elements); + + x += wrapper::svcnt<ScalarType>(); + pg = wrapper::svwhilelt<ScalarType>(x, input_width); + } + while(svptest_any(all_true_pg, pg)); + + /* Reduce sum */ + sum = svaddv(all_true_pg, vec_sum); + + if(is_log) + { + sum = static_cast<ScalarType>(std::log(sum)); + } + else + { + sum = ScalarType(1) / sum; + } + } + + /* Normalize exponentials */ + { + /* Loop over row and compute softmax */ + int x = 0; + svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width); + do + { + auto vec_in = svld1(pg, tmp_ptr + x); + auto normalized_value = wrapper::svdup_n(static_cast<ScalarType>(0)); + if(is_log) + { + normalized_value = svsub_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum))); + } + else + { + normalized_value = svmul_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum))); + } + svst1(pg, out_ptr + x, normalized_value); + + x += wrapper::svcnt<ScalarType>(); + pg = wrapper::svwhilelt<ScalarType>(x, input_width); + } + while(svptest_any(all_true_pg, pg)); + } + }, + in_it, max_it, out_it); +} + +template void sve_logits_1d_max<float>(const ITensor *in, ITensor *out, const Window &window); +template void sve_logits_1d_max<float16_t>(const ITensor *in, ITensor *out, const Window &window); +template void sve_logits_1d_max<qasymm8_t>(const ITensor *in, ITensor *out, const Window &window); +template void sve_logits_1d_max<qasymm8_signed_t>(const ITensor *in, ITensor *out, const Window &window); + +template void sve_softmax_logits_1d_float<float>(const ITensor *in, const ITensor *max, void *const tmp, + ITensor *out, const float beta, bool is_log, const Window &window); +template void sve_softmax_logits_1d_float<float16_t>(const ITensor *in, const ITensor *max, void *const tmp, + ITensor *out, const float beta, bool is_log, const Window &window); +} // namespace cpu +} // namespace arm_compute +#endif /* defined(ENABLE_SVE) */ diff --git a/src/core/cpu/kernels/softmax/impl/SVE/list.h b/src/core/cpu/kernels/softmax/impl/sve/list.h index d558d7d193..7ddb358b8e 100644 --- a/src/core/cpu/kernels/softmax/impl/SVE/list.h +++ b/src/core/cpu/kernels/softmax/impl/sve/list.h @@ -24,7 +24,7 @@ #ifndef SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H #define SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H -#if defined(__ARM_FEATURE_SVE) +#if defined(ENABLE_SVE) #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/Traits.h" #include "src/core/NEON/SVEMath.h" @@ -36,44 +36,11 @@ namespace arm_compute namespace cpu { template <typename ScalarType> -void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window) -{ - const auto all_true_pg = wrapper::svptrue<ScalarType>(); - const auto window_start_x = static_cast<int>(window.x().start()); - const auto window_end_x = static_cast<int>(window.x().end()); - - Window win{ window }; - win.set(Window::DimX, Window::Dimension(0, 1, 1)); - Iterator input(in, win); - Iterator output(out, win); - - execute_window_loop(win, [&](const Coordinates &) - { - // Get pointers - const auto in_ptr = reinterpret_cast<const ScalarType *>(input.ptr()); - const auto out_ptr = reinterpret_cast<ScalarType *>(output.ptr()); - - // Init max value - auto vec_max = wrapper::svdup_n(support::cpp11::lowest<ScalarType>()); +void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window); - int x = window_start_x; - svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); - do - { - const auto current_value = svld1(pg, in_ptr + x); - vec_max = svmax_m(pg, vec_max, current_value); - - x += wrapper::svcnt<ScalarType>(); - pg = wrapper::svwhilelt<ScalarType>(x, window_end_x); - } - while(svptest_any(all_true_pg, pg)); - - auto max_val = svmaxv(all_true_pg, vec_max); - - *out_ptr = max_val; - }, - input, output); -} +template <typename ScalarType> +void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp, + ITensor *out, const float beta, bool is_log, const Window &window); #if defined(__ARM_FEATURE_SVE2) template <typename ScalarType> @@ -249,105 +216,8 @@ void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void in_it, max_it, out_it); } #endif /* defined(__ARM_FEATURE_SVE2) */ - -template <typename ScalarType> -void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp, - ITensor *out, const float beta, bool is_log, const Window &window) -{ - const int start_x = in->info()->valid_region().anchor.x(); - const int input_width = in->info()->valid_region().shape.x(); - - Iterator in_it(in, window); - Iterator max_it(max, window); - Iterator out_it(out, window); - - const auto all_true_pg = wrapper::svptrue<ScalarType>(); - - execute_window_loop(window, [&](const Coordinates &) - { - /* Get pointers */ - const auto in_ptr = reinterpret_cast<const ScalarType *>(in_it.ptr()) + start_x; - const auto out_ptr = reinterpret_cast<ScalarType *>(out_it.ptr()) + start_x; - const auto tmp_ptr = reinterpret_cast<ScalarType *>(tmp); - - ScalarType sum{ 0 }; - - /* Compute exponentials and sum */ - { - /* Get max value */ - const auto max_val = *reinterpret_cast<const ScalarType *>(max_it.ptr()); - const auto vec_max = wrapper::svdup_n(max_val); - - /* Init sum to zero */ - auto vec_sum = wrapper::svdup_n(static_cast<ScalarType>(0)); - - /* Loop over row and compute exponentials and sum */ - int x = 0; - svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width); - do - { - auto vec_elements = svld1(pg, in_ptr + x); - vec_elements = svsub_z(pg, vec_elements, vec_max); - if(is_log) - { - vec_elements = svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta))); - vec_sum = svadd_m(pg, vec_sum, wrapper::svexp_z(pg, vec_elements)); - } - else - { - vec_elements = wrapper::svexp_z(pg, svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta)))); - vec_sum = svadd_m(pg, vec_sum, vec_elements); - } - svst1(pg, tmp_ptr + x, vec_elements); - - x += wrapper::svcnt<ScalarType>(); - pg = wrapper::svwhilelt<ScalarType>(x, input_width); - } - while(svptest_any(all_true_pg, pg)); - - /* Reduce sum */ - sum = svaddv(all_true_pg, vec_sum); - - if(is_log) - { - sum = static_cast<ScalarType>(std::log(sum)); - } - else - { - sum = ScalarType(1) / sum; - } - } - - /* Normalize exponentials */ - { - /* Loop over row and compute softmax */ - int x = 0; - svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width); - do - { - auto vec_in = svld1(pg, tmp_ptr + x); - auto normalized_value = wrapper::svdup_n(static_cast<ScalarType>(0)); - if(is_log) - { - normalized_value = svsub_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum))); - } - else - { - normalized_value = svmul_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum))); - } - svst1(pg, out_ptr + x, normalized_value); - - x += wrapper::svcnt<ScalarType>(); - pg = wrapper::svwhilelt<ScalarType>(x, input_width); - } - while(svptest_any(all_true_pg, pg)); - } - }, - in_it, max_it, out_it); -} - } // namespace cpu } // namespace arm_compute -#endif /* defined(__ARM_FEATURE_SVE) */ +#endif /* defined(ENABLE_SVE) */ #endif /* SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H */ |