aboutsummaryrefslogtreecommitdiff
path: root/src/core/cpu/kernels/softmax
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-22 16:42:03 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2021-06-07 13:21:17 +0000
commitbdcdc39d89b6a6556f5c0483af5379f75eae0c55 (patch)
tree454cd50afa81da3ca3382701619fef023911e3f7 /src/core/cpu/kernels/softmax
parent5a643320b79f15a5d09b5366c4744579cf71e303 (diff)
downloadComputeLibrary-bdcdc39d89b6a6556f5c0483af5379f75eae0c55.tar.gz
Enable fat binary support
Changes our build system to allow building both Neon(TM) and SVE kernels and package them in the same binary. This will allow runtime selection of the underlying architecture. Adds new build option, fat_binary, for enabling this feature. Change-Id: I8e8386149773ce28e071a2fb7ddd8c8ae0f28a4a Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5704 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/cpu/kernels/softmax')
-rw-r--r--src/core/cpu/kernels/softmax/impl/neon/list.h (renamed from src/core/cpu/kernels/softmax/impl/NEON/list.h)0
-rw-r--r--src/core/cpu/kernels/softmax/impl/sve/impl.cpp185
-rw-r--r--src/core/cpu/kernels/softmax/impl/sve/list.h (renamed from src/core/cpu/kernels/softmax/impl/SVE/list.h)142
3 files changed, 191 insertions, 136 deletions
diff --git a/src/core/cpu/kernels/softmax/impl/NEON/list.h b/src/core/cpu/kernels/softmax/impl/neon/list.h
index 5ebee31272..5ebee31272 100644
--- a/src/core/cpu/kernels/softmax/impl/NEON/list.h
+++ b/src/core/cpu/kernels/softmax/impl/neon/list.h
diff --git a/src/core/cpu/kernels/softmax/impl/sve/impl.cpp b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp
new file mode 100644
index 0000000000..4ed5a4fbea
--- /dev/null
+++ b/src/core/cpu/kernels/softmax/impl/sve/impl.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#if defined(ENABLE_SVE)
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "arm_compute/core/utils/misc/Traits.h"
+#include "src/core/NEON/SVEMath.h"
+#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+template <typename ScalarType>
+void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window)
+{
+ const auto all_true_pg = wrapper::svptrue<ScalarType>();
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+
+ Window win{ window };
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
+ Iterator input(in, win);
+ Iterator output(out, win);
+
+ execute_window_loop(win, [&](const Coordinates &)
+ {
+ // Get pointers
+ const auto in_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
+ const auto out_ptr = reinterpret_cast<ScalarType *>(output.ptr());
+
+ // Init max value
+ auto vec_max = wrapper::svdup_n(support::cpp11::lowest<ScalarType>());
+
+ int x = window_start_x;
+ svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
+ do
+ {
+ const auto current_value = svld1(pg, in_ptr + x);
+ vec_max = svmax_m(pg, vec_max, current_value);
+
+ x += wrapper::svcnt<ScalarType>();
+ pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
+ }
+ while(svptest_any(all_true_pg, pg));
+
+ auto max_val = svmaxv(all_true_pg, vec_max);
+
+ *out_ptr = max_val;
+ },
+ input, output);
+}
+
+template <typename ScalarType>
+void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp,
+ ITensor *out, const float beta, bool is_log, const Window &window)
+{
+ const int start_x = in->info()->valid_region().anchor.x();
+ const int input_width = in->info()->valid_region().shape.x();
+
+ Iterator in_it(in, window);
+ Iterator max_it(max, window);
+ Iterator out_it(out, window);
+
+ const auto all_true_pg = wrapper::svptrue<ScalarType>();
+
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ /* Get pointers */
+ const auto in_ptr = reinterpret_cast<const ScalarType *>(in_it.ptr()) + start_x;
+ const auto out_ptr = reinterpret_cast<ScalarType *>(out_it.ptr()) + start_x;
+ const auto tmp_ptr = reinterpret_cast<ScalarType *>(tmp);
+
+ ScalarType sum{ 0 };
+
+ /* Compute exponentials and sum */
+ {
+ /* Get max value */
+ const auto max_val = *reinterpret_cast<const ScalarType *>(max_it.ptr());
+ const auto vec_max = wrapper::svdup_n(max_val);
+
+ /* Init sum to zero */
+ auto vec_sum = wrapper::svdup_n(static_cast<ScalarType>(0));
+
+ /* Loop over row and compute exponentials and sum */
+ int x = 0;
+ svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width);
+ do
+ {
+ auto vec_elements = svld1(pg, in_ptr + x);
+ vec_elements = svsub_z(pg, vec_elements, vec_max);
+ if(is_log)
+ {
+ vec_elements = svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta)));
+ vec_sum = svadd_m(pg, vec_sum, wrapper::svexp_z(pg, vec_elements));
+ }
+ else
+ {
+ vec_elements = wrapper::svexp_z(pg, svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta))));
+ vec_sum = svadd_m(pg, vec_sum, vec_elements);
+ }
+ svst1(pg, tmp_ptr + x, vec_elements);
+
+ x += wrapper::svcnt<ScalarType>();
+ pg = wrapper::svwhilelt<ScalarType>(x, input_width);
+ }
+ while(svptest_any(all_true_pg, pg));
+
+ /* Reduce sum */
+ sum = svaddv(all_true_pg, vec_sum);
+
+ if(is_log)
+ {
+ sum = static_cast<ScalarType>(std::log(sum));
+ }
+ else
+ {
+ sum = ScalarType(1) / sum;
+ }
+ }
+
+ /* Normalize exponentials */
+ {
+ /* Loop over row and compute softmax */
+ int x = 0;
+ svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width);
+ do
+ {
+ auto vec_in = svld1(pg, tmp_ptr + x);
+ auto normalized_value = wrapper::svdup_n(static_cast<ScalarType>(0));
+ if(is_log)
+ {
+ normalized_value = svsub_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum)));
+ }
+ else
+ {
+ normalized_value = svmul_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum)));
+ }
+ svst1(pg, out_ptr + x, normalized_value);
+
+ x += wrapper::svcnt<ScalarType>();
+ pg = wrapper::svwhilelt<ScalarType>(x, input_width);
+ }
+ while(svptest_any(all_true_pg, pg));
+ }
+ },
+ in_it, max_it, out_it);
+}
+
+template void sve_logits_1d_max<float>(const ITensor *in, ITensor *out, const Window &window);
+template void sve_logits_1d_max<float16_t>(const ITensor *in, ITensor *out, const Window &window);
+template void sve_logits_1d_max<qasymm8_t>(const ITensor *in, ITensor *out, const Window &window);
+template void sve_logits_1d_max<qasymm8_signed_t>(const ITensor *in, ITensor *out, const Window &window);
+
+template void sve_softmax_logits_1d_float<float>(const ITensor *in, const ITensor *max, void *const tmp,
+ ITensor *out, const float beta, bool is_log, const Window &window);
+template void sve_softmax_logits_1d_float<float16_t>(const ITensor *in, const ITensor *max, void *const tmp,
+ ITensor *out, const float beta, bool is_log, const Window &window);
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(ENABLE_SVE) */
diff --git a/src/core/cpu/kernels/softmax/impl/SVE/list.h b/src/core/cpu/kernels/softmax/impl/sve/list.h
index d558d7d193..7ddb358b8e 100644
--- a/src/core/cpu/kernels/softmax/impl/SVE/list.h
+++ b/src/core/cpu/kernels/softmax/impl/sve/list.h
@@ -24,7 +24,7 @@
#ifndef SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H
#define SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H
-#if defined(__ARM_FEATURE_SVE)
+#if defined(ENABLE_SVE)
#include "arm_compute/core/Types.h"
#include "arm_compute/core/utils/misc/Traits.h"
#include "src/core/NEON/SVEMath.h"
@@ -36,44 +36,11 @@ namespace arm_compute
namespace cpu
{
template <typename ScalarType>
-void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window)
-{
- const auto all_true_pg = wrapper::svptrue<ScalarType>();
- const auto window_start_x = static_cast<int>(window.x().start());
- const auto window_end_x = static_cast<int>(window.x().end());
-
- Window win{ window };
- win.set(Window::DimX, Window::Dimension(0, 1, 1));
- Iterator input(in, win);
- Iterator output(out, win);
-
- execute_window_loop(win, [&](const Coordinates &)
- {
- // Get pointers
- const auto in_ptr = reinterpret_cast<const ScalarType *>(input.ptr());
- const auto out_ptr = reinterpret_cast<ScalarType *>(output.ptr());
-
- // Init max value
- auto vec_max = wrapper::svdup_n(support::cpp11::lowest<ScalarType>());
+void sve_logits_1d_max(const ITensor *in, ITensor *out, const Window &window);
- int x = window_start_x;
- svbool_t pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
- do
- {
- const auto current_value = svld1(pg, in_ptr + x);
- vec_max = svmax_m(pg, vec_max, current_value);
-
- x += wrapper::svcnt<ScalarType>();
- pg = wrapper::svwhilelt<ScalarType>(x, window_end_x);
- }
- while(svptest_any(all_true_pg, pg));
-
- auto max_val = svmaxv(all_true_pg, vec_max);
-
- *out_ptr = max_val;
- },
- input, output);
-}
+template <typename ScalarType>
+void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp,
+ ITensor *out, const float beta, bool is_log, const Window &window);
#if defined(__ARM_FEATURE_SVE2)
template <typename ScalarType>
@@ -249,105 +216,8 @@ void sve_softmax_logits_1d_quantized(const ITensor *in, const ITensor *max, void
in_it, max_it, out_it);
}
#endif /* defined(__ARM_FEATURE_SVE2) */
-
-template <typename ScalarType>
-void sve_softmax_logits_1d_float(const ITensor *in, const ITensor *max, void *const tmp,
- ITensor *out, const float beta, bool is_log, const Window &window)
-{
- const int start_x = in->info()->valid_region().anchor.x();
- const int input_width = in->info()->valid_region().shape.x();
-
- Iterator in_it(in, window);
- Iterator max_it(max, window);
- Iterator out_it(out, window);
-
- const auto all_true_pg = wrapper::svptrue<ScalarType>();
-
- execute_window_loop(window, [&](const Coordinates &)
- {
- /* Get pointers */
- const auto in_ptr = reinterpret_cast<const ScalarType *>(in_it.ptr()) + start_x;
- const auto out_ptr = reinterpret_cast<ScalarType *>(out_it.ptr()) + start_x;
- const auto tmp_ptr = reinterpret_cast<ScalarType *>(tmp);
-
- ScalarType sum{ 0 };
-
- /* Compute exponentials and sum */
- {
- /* Get max value */
- const auto max_val = *reinterpret_cast<const ScalarType *>(max_it.ptr());
- const auto vec_max = wrapper::svdup_n(max_val);
-
- /* Init sum to zero */
- auto vec_sum = wrapper::svdup_n(static_cast<ScalarType>(0));
-
- /* Loop over row and compute exponentials and sum */
- int x = 0;
- svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width);
- do
- {
- auto vec_elements = svld1(pg, in_ptr + x);
- vec_elements = svsub_z(pg, vec_elements, vec_max);
- if(is_log)
- {
- vec_elements = svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta)));
- vec_sum = svadd_m(pg, vec_sum, wrapper::svexp_z(pg, vec_elements));
- }
- else
- {
- vec_elements = wrapper::svexp_z(pg, svmul_z(pg, vec_elements, wrapper::svdup_n(static_cast<ScalarType>(beta))));
- vec_sum = svadd_m(pg, vec_sum, vec_elements);
- }
- svst1(pg, tmp_ptr + x, vec_elements);
-
- x += wrapper::svcnt<ScalarType>();
- pg = wrapper::svwhilelt<ScalarType>(x, input_width);
- }
- while(svptest_any(all_true_pg, pg));
-
- /* Reduce sum */
- sum = svaddv(all_true_pg, vec_sum);
-
- if(is_log)
- {
- sum = static_cast<ScalarType>(std::log(sum));
- }
- else
- {
- sum = ScalarType(1) / sum;
- }
- }
-
- /* Normalize exponentials */
- {
- /* Loop over row and compute softmax */
- int x = 0;
- svbool_t pg = wrapper::svwhilelt<ScalarType>(x, input_width);
- do
- {
- auto vec_in = svld1(pg, tmp_ptr + x);
- auto normalized_value = wrapper::svdup_n(static_cast<ScalarType>(0));
- if(is_log)
- {
- normalized_value = svsub_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum)));
- }
- else
- {
- normalized_value = svmul_z(pg, vec_in, wrapper::svdup_n(static_cast<ScalarType>(sum)));
- }
- svst1(pg, out_ptr + x, normalized_value);
-
- x += wrapper::svcnt<ScalarType>();
- pg = wrapper::svwhilelt<ScalarType>(x, input_width);
- }
- while(svptest_any(all_true_pg, pg));
- }
- },
- in_it, max_it, out_it);
-}
-
} // namespace cpu
} // namespace arm_compute
-#endif /* defined(__ARM_FEATURE_SVE) */
+#endif /* defined(ENABLE_SVE) */
#endif /* SRC_CORE_SVE_KERNELS_SOFTMAX_LIST_H */