diff options
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve/fp32.cpp')
-rw-r--r-- | src/cpu/kernels/activation/generic/sve/fp32.cpp | 149 |
1 files changed, 80 insertions, 69 deletions
diff --git a/src/cpu/kernels/activation/generic/sve/fp32.cpp b/src/cpu/kernels/activation/generic/sve/fp32.cpp index 87f04c255a..d1b075d52c 100644 --- a/src/cpu/kernels/activation/generic/sve/fp32.cpp +++ b/src/cpu/kernels/activation/generic/sve/fp32.cpp @@ -26,13 +26,13 @@ #include "arm_compute/core/ITensorPack.h" #include "arm_compute/core/Window.h" #include "arm_compute/function_info/ActivationLayerInfo.h" + #include "src/core/NEON/SVEMath.h" +#include <arm_sve.h> #include <cmath> #include <cstddef> -#include <arm_sve.h> - namespace arm_compute { namespace cpu @@ -58,78 +58,89 @@ void sve_fp32_activation(const ITensor *src, ITensor *dst, const ActivationLayer const auto va = svdup_n_f32(act_info.a()); const auto vb = svdup_n_f32(act_info.b()); - execute_window_loop(win_collapsed, [&](const Coordinates &) - { - const auto input_ptr = reinterpret_cast<const float *>(input.ptr()); - const auto output_ptr = reinterpret_cast<float *>(output.ptr()); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast<const float *>(input.ptr()); + const auto output_ptr = reinterpret_cast<float *>(output.ptr()); - svfloat32_t tmp; + svfloat32_t tmp; - // Compute S elements per iteration - int x = window_start_x; - svbool_t pg = svwhilelt_b32(x, window_end_x); - do - { - const auto vin = svld1_f32(pg, input_ptr + x); - switch(act) + // Compute S elements per iteration + int x = window_start_x; + svbool_t pg = svwhilelt_b32(x, window_end_x); + do { - case ActivationLayerInfo::ActivationFunction::ABS: - tmp = svabs_f32_z(pg, vin); - break; - case ActivationLayerInfo::ActivationFunction::LINEAR: - tmp = svmla_f32_z(pg, vb, va, vin); - break; - case ActivationLayerInfo::ActivationFunction::LOGISTIC: - tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin)))); - break; - case ActivationLayerInfo::ActivationFunction::RELU: - tmp = svmax_f32_z(pg, const_0, vin); - break; - case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: - tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin)); - break; - case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: - tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin)); - break; - case ActivationLayerInfo::ActivationFunction::LEAKY_RELU: - tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va), svmax_f32_z(pg, vin, const_0)); - break; - case ActivationLayerInfo::ActivationFunction::SOFT_RELU: - tmp = svsel_f32(svcmpgt_f32(pg, vin, soft_relu_thresh), vin, svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin)))); - break; - case ActivationLayerInfo::ActivationFunction::ELU: - tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin, svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1))); - break; - case ActivationLayerInfo::ActivationFunction::SQRT: - tmp = svsqrt_f32_z(pg, vin); - break; - case ActivationLayerInfo::ActivationFunction::SQUARE: - tmp = svmul_f32_z(pg, vin, vin); - break; - case ActivationLayerInfo::ActivationFunction::TANH: - tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin))); - break; - case ActivationLayerInfo::ActivationFunction::IDENTITY: - tmp = vin; - break; - case ActivationLayerInfo::ActivationFunction::HARD_SWISH: - tmp = svmul_f32_z(pg, vin, svmul_f32_z(pg, const_inv_6, svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3))))); - break; - case ActivationLayerInfo::ActivationFunction::SWISH: - tmp = svmul_f32_z(pg, vin, svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, svmul_f32_z(pg, va, vin)))))); - break; - default: - ARM_COMPUTE_ERROR("Unsupported activation function"); - } - svst1_f32(pg, output_ptr + x, tmp); + const auto vin = svld1_f32(pg, input_ptr + x); + switch (act) + { + case ActivationLayerInfo::ActivationFunction::ABS: + tmp = svabs_f32_z(pg, vin); + break; + case ActivationLayerInfo::ActivationFunction::LINEAR: + tmp = svmla_f32_z(pg, vb, va, vin); + break; + case ActivationLayerInfo::ActivationFunction::LOGISTIC: + tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin)))); + break; + case ActivationLayerInfo::ActivationFunction::RELU: + tmp = svmax_f32_z(pg, const_0, vin); + break; + case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: + tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin)); + break; + case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: + tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin)); + break; + case ActivationLayerInfo::ActivationFunction::LEAKY_RELU: + tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va), + svmax_f32_z(pg, vin, const_0)); + break; + case ActivationLayerInfo::ActivationFunction::SOFT_RELU: + tmp = svsel_f32(svcmpgt_f32(pg, vin, soft_relu_thresh), vin, + svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin)))); + break; + case ActivationLayerInfo::ActivationFunction::ELU: + tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin, + svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1))); + break; + case ActivationLayerInfo::ActivationFunction::SQRT: + tmp = svsqrt_f32_z(pg, vin); + break; + case ActivationLayerInfo::ActivationFunction::SQUARE: + tmp = svmul_f32_z(pg, vin, vin); + break; + case ActivationLayerInfo::ActivationFunction::TANH: + tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin))); + break; + case ActivationLayerInfo::ActivationFunction::IDENTITY: + tmp = vin; + break; + case ActivationLayerInfo::ActivationFunction::HARD_SWISH: + tmp = svmul_f32_z( + pg, vin, + svmul_f32_z( + pg, const_inv_6, + svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3))))); + break; + case ActivationLayerInfo::ActivationFunction::SWISH: + tmp = svmul_f32_z( + pg, vin, + svinv_f32_z(pg, svadd_f32_z(pg, const_1, + svexp_f32_z(pg, svneg_f32_z(pg, svmul_f32_z(pg, va, vin)))))); + break; + default: + ARM_COMPUTE_ERROR("Unsupported activation function"); + } + svst1_f32(pg, output_ptr + x, tmp); - x += svcntw(); - pg = svwhilelt_b32(x, window_end_x); + x += svcntw(); + pg = svwhilelt_b32(x, window_end_x); - } - while(svptest_any(svptrue_b32(), pg)); - }, - input, output); + } while (svptest_any(svptrue_b32(), pg)); + }, + input, output); } } // namespace cpu } // namespace arm_compute |