aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/sve/fp16.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve/fp16.cpp')
-rw-r--r--src/cpu/kernels/activation/generic/sve/fp16.cpp148
1 files changed, 79 insertions, 69 deletions
diff --git a/src/cpu/kernels/activation/generic/sve/fp16.cpp b/src/cpu/kernels/activation/generic/sve/fp16.cpp
index 4757c60d8f..97399e01e0 100644
--- a/src/cpu/kernels/activation/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/activation/generic/sve/fp16.cpp
@@ -29,11 +29,11 @@
#include "arm_compute/core/Window.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
-#include <cmath>
-#include <cstddef>
-
#include "src/core/NEON/SVEMath.h"
+
#include <arm_sve.h>
+#include <cmath>
+#include <cstddef>
namespace arm_compute
{
@@ -59,77 +59,87 @@ void sve_fp16_activation(const ITensor *src, ITensor *dst, const ActivationLayer
const auto va = svdup_n_f16(act_info.a());
const auto vb = svdup_n_f16(act_info.b());
- execute_window_loop(win_collapsed, [&](const Coordinates &)
- {
- const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
- const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
- svfloat16_t tmp;
+ svfloat16_t tmp;
- int x = window_start_x;
- svbool_t pg = svwhilelt_b16(x, window_end_x);
- do
- {
- const auto vin = svld1_f16(pg, input_ptr + x);
- switch(act)
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b16(x, window_end_x);
+ do
{
- case ActivationLayerInfo::ActivationFunction::ABS:
- tmp = svabs_f16_z(pg, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::LINEAR:
- tmp = svmla_f16_z(pg, vb, va, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::LOGISTIC:
- tmp = svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, vin))));
- break;
- case ActivationLayerInfo::ActivationFunction::RELU:
- tmp = svmax_f16_z(pg, const_0, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
- tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, const_0, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
- tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, vb, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
- tmp = svadd_f16_z(pg, svmul_f16_z(pg, svmin_f16_z(pg, vin, const_0), va), svmax_f16_z(pg, vin, const_0));
- break;
- case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
- tmp = svlog_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, vin)));
- break;
- case ActivationLayerInfo::ActivationFunction::ELU:
- tmp = svsel_f16(svcmpgt_f16(pg, vin, const_0), vin, svmul_f16_z(pg, va, svsub_f16_z(pg, svexp_f16_z(pg, vin), const_1)));
- break;
- case ActivationLayerInfo::ActivationFunction::SQRT:
- tmp = svsqrt_f16_z(pg, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::SQUARE:
- tmp = svmul_f16_z(pg, vin, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::TANH:
- tmp = svmul_f16_z(pg, va, svtanh_f16_z(pg, svmul_f16_z(pg, vb, vin)));
- break;
- case ActivationLayerInfo::ActivationFunction::IDENTITY:
- tmp = vin;
- break;
- case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
- tmp = svmul_f16_z(pg, vin, svmul_f16_z(pg, const_inv_6, svmin_f16_z(pg, const_6, svmax_f16_z(pg, const_0, svadd_f16_z(pg, vin, const_3)))));
- break;
- case ActivationLayerInfo::ActivationFunction::SWISH:
- tmp = svmul_f16_z(pg, vin, svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, svmul_f16_z(pg, va, vin))))));
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported activation function");
- }
- svst1_f16(pg, output_ptr + x, tmp);
+ const auto vin = svld1_f16(pg, input_ptr + x);
+ switch (act)
+ {
+ case ActivationLayerInfo::ActivationFunction::ABS:
+ tmp = svabs_f16_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LINEAR:
+ tmp = svmla_f16_z(pg, vb, va, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ tmp = svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ tmp = svmax_f16_z(pg, const_0, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, const_0, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, vb, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+ tmp = svadd_f16_z(pg, svmul_f16_z(pg, svmin_f16_z(pg, vin, const_0), va),
+ svmax_f16_z(pg, vin, const_0));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+ tmp = svlog_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::ELU:
+ tmp = svsel_f16(svcmpgt_f16(pg, vin, const_0), vin,
+ svmul_f16_z(pg, va, svsub_f16_z(pg, svexp_f16_z(pg, vin), const_1)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQRT:
+ tmp = svsqrt_f16_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQUARE:
+ tmp = svmul_f16_z(pg, vin, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ tmp = svmul_f16_z(pg, va, svtanh_f16_z(pg, svmul_f16_z(pg, vb, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ tmp = svmul_f16_z(
+ pg, vin,
+ svmul_f16_z(
+ pg, const_inv_6,
+ svmin_f16_z(pg, const_6, svmax_f16_z(pg, const_0, svadd_f16_z(pg, vin, const_3)))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ tmp = svmul_f16_z(
+ pg, vin,
+ svinv_f16_z(pg, svadd_f16_z(pg, const_1,
+ svexp_f16_z(pg, svneg_f16_z(pg, svmul_f16_z(pg, va, vin))))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ svst1_f16(pg, output_ptr + x, tmp);
- x += svcnth();
- pg = svwhilelt_b16(x, window_end_x);
+ x += svcnth();
+ pg = svwhilelt_b16(x, window_end_x);
- }
- while(svptest_any(svptrue_b16(), pg));
- },
- input, output);
+ } while (svptest_any(svptrue_b16(), pg));
+ },
+ input, output);
}
} // namespace cpu
} // namespace arm_compute