aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/sve/fp32.cpp
diff options
context:
space:
mode:
authorFelix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-27 17:46:17 +0100
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-28 12:08:05 +0000
commitafd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree03bc7d5a762099989b16a656fa8d397b490ed70e /src/cpu/kernels/activation/generic/sve/fp32.cpp
parentbdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
downloadComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
Apply clang-format on repository
Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve/fp32.cpp')
-rw-r--r--src/cpu/kernels/activation/generic/sve/fp32.cpp149
1 files changed, 80 insertions, 69 deletions
diff --git a/src/cpu/kernels/activation/generic/sve/fp32.cpp b/src/cpu/kernels/activation/generic/sve/fp32.cpp
index 87f04c255a..d1b075d52c 100644
--- a/src/cpu/kernels/activation/generic/sve/fp32.cpp
+++ b/src/cpu/kernels/activation/generic/sve/fp32.cpp
@@ -26,13 +26,13 @@
#include "arm_compute/core/ITensorPack.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
#include <cmath>
#include <cstddef>
-#include <arm_sve.h>
-
namespace arm_compute
{
namespace cpu
@@ -58,78 +58,89 @@ void sve_fp32_activation(const ITensor *src, ITensor *dst, const ActivationLayer
const auto va = svdup_n_f32(act_info.a());
const auto vb = svdup_n_f32(act_info.b());
- execute_window_loop(win_collapsed, [&](const Coordinates &)
- {
- const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
- const auto output_ptr = reinterpret_cast<float *>(output.ptr());
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float *>(output.ptr());
- svfloat32_t tmp;
+ svfloat32_t tmp;
- // Compute S elements per iteration
- int x = window_start_x;
- svbool_t pg = svwhilelt_b32(x, window_end_x);
- do
- {
- const auto vin = svld1_f32(pg, input_ptr + x);
- switch(act)
+ // Compute S elements per iteration
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b32(x, window_end_x);
+ do
{
- case ActivationLayerInfo::ActivationFunction::ABS:
- tmp = svabs_f32_z(pg, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::LINEAR:
- tmp = svmla_f32_z(pg, vb, va, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::LOGISTIC:
- tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin))));
- break;
- case ActivationLayerInfo::ActivationFunction::RELU:
- tmp = svmax_f32_z(pg, const_0, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
- tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
- tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin));
- break;
- case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
- tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va), svmax_f32_z(pg, vin, const_0));
- break;
- case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
- tmp = svsel_f32(svcmpgt_f32(pg, vin, soft_relu_thresh), vin, svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin))));
- break;
- case ActivationLayerInfo::ActivationFunction::ELU:
- tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin, svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1)));
- break;
- case ActivationLayerInfo::ActivationFunction::SQRT:
- tmp = svsqrt_f32_z(pg, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::SQUARE:
- tmp = svmul_f32_z(pg, vin, vin);
- break;
- case ActivationLayerInfo::ActivationFunction::TANH:
- tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin)));
- break;
- case ActivationLayerInfo::ActivationFunction::IDENTITY:
- tmp = vin;
- break;
- case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
- tmp = svmul_f32_z(pg, vin, svmul_f32_z(pg, const_inv_6, svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3)))));
- break;
- case ActivationLayerInfo::ActivationFunction::SWISH:
- tmp = svmul_f32_z(pg, vin, svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, svmul_f32_z(pg, va, vin))))));
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported activation function");
- }
- svst1_f32(pg, output_ptr + x, tmp);
+ const auto vin = svld1_f32(pg, input_ptr + x);
+ switch (act)
+ {
+ case ActivationLayerInfo::ActivationFunction::ABS:
+ tmp = svabs_f32_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LINEAR:
+ tmp = svmla_f32_z(pg, vb, va, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ tmp = svmax_f32_z(pg, const_0, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+ tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va),
+ svmax_f32_z(pg, vin, const_0));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+ tmp = svsel_f32(svcmpgt_f32(pg, vin, soft_relu_thresh), vin,
+ svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::ELU:
+ tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin,
+ svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQRT:
+ tmp = svsqrt_f32_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQUARE:
+ tmp = svmul_f32_z(pg, vin, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ tmp = svmul_f32_z(
+ pg, vin,
+ svmul_f32_z(
+ pg, const_inv_6,
+ svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3)))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SWISH:
+ tmp = svmul_f32_z(
+ pg, vin,
+ svinv_f32_z(pg, svadd_f32_z(pg, const_1,
+ svexp_f32_z(pg, svneg_f32_z(pg, svmul_f32_z(pg, va, vin))))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ svst1_f32(pg, output_ptr + x, tmp);
- x += svcntw();
- pg = svwhilelt_b32(x, window_end_x);
+ x += svcntw();
+ pg = svwhilelt_b32(x, window_end_x);
- }
- while(svptest_any(svptrue_b32(), pg));
- },
- input, output);
+ } while (svptest_any(svptrue_b32(), pg));
+ },
+ input, output);
}
} // namespace cpu
} // namespace arm_compute