aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-11-22 00:49:42 +0000
committerMichalis Spyrou <michalis.spyrou@arm.com>2020-12-14 16:02:26 +0000
commitaa51a5ba9a3f05be08b94859b53c398edee5d2e3 (patch)
treeb28829b483421b210cd7c8a256c7feafed736b36 /src/core/NEON/kernels
parent3737c7934da929003bda446291489cf352e43751 (diff)
downloadComputeLibrary-aa51a5ba9a3f05be08b94859b53c398edee5d2e3.tar.gz
COMPMID-3870: Create ActivationLayer SVE/SVE2
Adds support for ActivationLayer for SVE and SVE2. Datatypes supported: *FP32 *FP16 *QASYMM8 *QASYMM8_SIGNED *QSYMM16 Change-Id: Ia3583891795cda4ca2f9fa27c440731a5c27710d Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4566 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/NEActivationLayerKernel.cpp33
-rw-r--r--src/core/NEON/kernels/activation/impl/NEON/fp16.cpp (renamed from src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp)0
-rw-r--r--src/core/NEON/kernels/activation/impl/NEON/fp32.cpp (renamed from src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp)0
-rw-r--r--src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp (renamed from src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp)0
-rw-r--r--src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp (renamed from src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp)0
-rw-r--r--src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp (renamed from src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp)0
-rw-r--r--src/core/NEON/kernels/activation/impl/SVE/fp16.cpp132
-rw-r--r--src/core/NEON/kernels/activation/impl/SVE/fp32.cpp133
-rw-r--r--src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp185
-rw-r--r--src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp183
-rw-r--r--src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp122
-rw-r--r--src/core/NEON/kernels/activation/impl/list.h5
-rw-r--r--src/core/NEON/kernels/floor/impl/NEON/fp16.cpp (renamed from src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp)0
-rw-r--r--src/core/NEON/kernels/floor/impl/NEON/fp32.cpp (renamed from src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp)0
14 files changed, 792 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
index d969fd8e38..f215787bf6 100644
--- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp
@@ -56,6 +56,18 @@ struct ActivationKernel
static const ActivationKernel available_kernels[] =
{
+#if defined(__ARM_FEATURE_SVE)
+ {
+ "fp16_sve_activation",
+ [](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
+ REGISTER_FP16_SVE(arm_compute::cpu::fp16_sve_activation)
+ },
+ {
+ "fp32_sve_activation",
+ [](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
+ REGISTER_FP32_SVE(arm_compute::cpu::fp32_sve_activation)
+ },
+#else /* !defined(__ARM_FEATURE_SVE) */
{
"fp16_neon_activation",
[](const ActivationSelectorData & data) { return data.dt == DataType::F16; },
@@ -66,6 +78,25 @@ static const ActivationKernel available_kernels[] =
[](const ActivationSelectorData & data) { return data.dt == DataType::F32; },
REGISTER_FP32_NEON(arm_compute::cpu::fp32_neon_activation)
},
+#endif /* defined(__ARM_FEATURE_SVE) */
+
+#if defined(__ARM_FEATURE_SVE2) /* defined(__ARM_FEATURE_SVE2) */
+ {
+ "qasymm8_sve_activation",
+ [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
+ REGISTER_QASYMM8_SVE(arm_compute::cpu::qasymm8_sve_activation)
+ },
+ {
+ "qasymm8_signed_sve_activation",
+ [](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8_SIGNED; },
+ REGISTER_QASYMM8_SIGNED_SVE(arm_compute::cpu::qasymm8_signed_sve_activation)
+ },
+ {
+ "qsymm16_sve_activation",
+ [](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
+ REGISTER_QSYMM16_SVE(arm_compute::cpu::qsymm16_sve_activation)
+ },
+#else /* !defined(__ARM_FEATURE_SVE2) */
{
"qasymm8_neon_activation",
[](const ActivationSelectorData & data) { return data.dt == DataType::QASYMM8; },
@@ -81,6 +112,7 @@ static const ActivationKernel available_kernels[] =
[](const ActivationSelectorData & data) { return data.dt == DataType::QSYMM16; },
REGISTER_QSYMM16_NEON(arm_compute::cpu::qsymm16_neon_activation)
},
+#endif /* defined(__ARM_FEATURE_SVE2) */
};
const ActivationKernel *get_implementation(const ActivationSelectorData &data)
@@ -159,7 +191,6 @@ std::pair<Status, Window> validate_and_configure_window(const ITensorInfo *input
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output, *input->clone());
- // NEActivationLayerKernel doesn't need padding so update_window_and_padding() can be skipped
Coordinates coord;
coord.set_num_dimensions(output->num_dimensions());
output->set_valid_region(ValidRegion(coord, output->tensor_shape()));
diff --git a/src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp
index 58e1cfcf23..58e1cfcf23 100644
--- a/src/core/NEON/kernels/activation/impl/fp16_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/fp16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp
index 610db05224..610db05224 100644
--- a/src/core/NEON/kernels/activation/impl/fp32_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/fp32.cpp
diff --git a/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp
index 7b26441824..7b26441824 100644
--- a/src/core/NEON/kernels/activation/impl/qasymm8_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/qasymm8.cpp
diff --git a/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp
index c616c5e27d..c616c5e27d 100644
--- a/src/core/NEON/kernels/activation/impl/qasymm8_signed_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/qasymm8_signed.cpp
diff --git a/src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp b/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp
index 0bef807db9..0bef807db9 100644
--- a/src/core/NEON/kernels/activation/impl/qsymm16_neon_activation.cpp
+++ b/src/core/NEON/kernels/activation/impl/NEON/qsymm16.cpp
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp b/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp
new file mode 100644
index 0000000000..8d6f4f2351
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/fp16.cpp
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE)
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void fp16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationLayerInfo::ActivationFunction act = act_info.activation();
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+
+ const auto const_1 = svdup_n_f16(1.f);
+ const auto const_0 = svdup_n_f16(0.f);
+ const auto const_6 = svdup_n_f16(6.f);
+ const auto const_3 = svdup_n_f16(3.f);
+ const auto const_inv_6 = svdup_n_f16(0.166666667f);
+
+ const auto va = svdup_n_f16(act_info.a());
+ const auto vb = svdup_n_f16(act_info.b());
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const float16_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float16_t *>(output.ptr());
+
+ svfloat16_t tmp;
+
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b16(x, window_end_x);
+ do
+ {
+ const auto vin = svld1_f16(pg, input_ptr + x);
+ switch(act)
+ {
+ case ActivationLayerInfo::ActivationFunction::ABS:
+ tmp = svabs_f16_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LINEAR:
+ tmp = svmla_f16_z(pg, vb, va, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ tmp = svinv_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, svneg_f16_z(pg, vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ tmp = svmax_f16_z(pg, const_0, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, const_0, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ tmp = svmin_f16_z(pg, va, svmax_f16_z(pg, vb, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+ tmp = svadd_f16_z(pg, svmul_f16_z(pg, svmin_f16_z(pg, vin, const_0), va), svmax_f16_z(pg, vin, const_0));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+ tmp = svlog_f16_z(pg, svadd_f16_z(pg, const_1, svexp_f16_z(pg, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::ELU:
+ tmp = svsel_f16(svcmpgt_f16(pg, vin, const_0), vin, svmul_f16_z(pg, va, svsub_f16_z(pg, svexp_f16_z(pg, vin), const_1)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQRT:
+ tmp = svsqrt_f16_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQUARE:
+ tmp = svmul_f16_z(pg, vin, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ tmp = svmul_f16_z(pg, va, svtanh_f16_z(pg, svmul_f16_z(pg, vb, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ tmp = svmul_f16_z(pg, vin, svmul_f16_z(pg, const_inv_6, svmin_f16_z(pg, const_6, svmax_f16_z(pg, const_0, svadd_f16_z(pg, vin, const_3)))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ svst1_f16(pg, output_ptr + x, tmp);
+
+ x += svcnth();
+ pg = svwhilelt_b16(x, window_end_x);
+
+ }
+ while(svptest_any(svptrue_b16(), pg));
+ },
+ input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif // __ARM_FEATURE_SVE \ No newline at end of file
diff --git a/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp b/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp
new file mode 100644
index 0000000000..2c276028a0
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/fp32.cpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/NEON/SVEMath.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE)
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void fp32_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationLayerInfo::ActivationFunction act = act_info.activation();
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+
+ const auto const_1 = svdup_n_f32(1.f);
+ const auto const_0 = svdup_n_f32(0.f);
+ const auto const_6 = svdup_n_f32(6.f);
+ const auto const_3 = svdup_n_f32(3.f);
+ const auto const_inv_6 = svdup_n_f32(0.166666667f);
+
+ const auto va = svdup_n_f32(act_info.a());
+ const auto vb = svdup_n_f32(act_info.b());
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const float *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<float *>(output.ptr());
+
+ svfloat32_t tmp;
+
+ // Compute S elements per iteration
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b32(x, window_end_x);
+ do
+ {
+ const auto vin = svld1_f32(pg, input_ptr + x);
+ switch(act)
+ {
+ case ActivationLayerInfo::ActivationFunction::ABS:
+ tmp = svabs_f32_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LINEAR:
+ tmp = svmla_f32_z(pg, vb, va, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::LOGISTIC:
+ tmp = svinv_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, svneg_f32_z(pg, vin))));
+ break;
+ case ActivationLayerInfo::ActivationFunction::RELU:
+ tmp = svmax_f32_z(pg, const_0, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU:
+ tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, const_0, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU:
+ tmp = svmin_f32_z(pg, va, svmax_f32_z(pg, vb, vin));
+ break;
+ case ActivationLayerInfo::ActivationFunction::LEAKY_RELU:
+ tmp = svadd_f32_z(pg, svmul_f32_z(pg, svmin_f32_z(pg, vin, const_0), va), svmax_f32_z(pg, vin, const_0));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
+ tmp = svlog_f32_z(pg, svadd_f32_z(pg, const_1, svexp_f32_z(pg, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::ELU:
+ tmp = svsel_f32(svcmpgt_f32(pg, vin, const_0), vin, svmul_f32_z(pg, va, svsub_f32_z(pg, svexp_f32_z(pg, vin), const_1)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQRT:
+ tmp = svsqrt_f32_z(pg, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::SQUARE:
+ tmp = svmul_f32_z(pg, vin, vin);
+ break;
+ case ActivationLayerInfo::ActivationFunction::TANH:
+ tmp = svmul_f32_z(pg, va, svtanh_f32_z(pg, svmul_f32_z(pg, vb, vin)));
+ break;
+ case ActivationLayerInfo::ActivationFunction::IDENTITY:
+ tmp = vin;
+ break;
+ case ActivationLayerInfo::ActivationFunction::HARD_SWISH:
+ tmp = svmul_f32_z(pg, vin, svmul_f32_z(pg, const_inv_6, svmin_f32_z(pg, const_6, svmax_f32_z(pg, const_0, svadd_f32_z(pg, vin, const_3)))));
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+ svst1_f32(pg, output_ptr + x, tmp);
+
+ x += svcntw();
+ pg = svwhilelt_b32(x, window_end_x);
+
+ }
+ while(svptest_any(svptrue_b32(), pg));
+ },
+ input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif // __ARM_FEATURE_SVE \ No newline at end of file
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp b/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp
new file mode 100644
index 0000000000..a49a562c84
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/qasymm8.cpp
@@ -0,0 +1,185 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <arm_neon.h>
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEAsymm.h"
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void qasymm8_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationLayerInfo::ActivationFunction act = act_info.activation();
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
+ const auto va = svdup_n_u8(quantize_qasymm8(act_info.a(), qi_in));
+ const auto vb = svdup_n_u8(quantize_qasymm8(act_info.b(), qi_in));
+ const auto const_0 = quantize_qasymm8(0.f, qi_in);
+ const auto vconst_0 = svdup_n_u8(const_0);
+ const auto vconst_1 = svdup_n_f32(1.f);
+ const auto va_f32 = svdup_n_f32(act_info.a());
+ const auto vb_f32 = svdup_n_f32(act_info.b());
+ const auto const_6_f32 = svdup_n_f32(6.f);
+ const auto const_0_f32 = svdup_n_f32(0.f);
+ const auto const_3_f32 = svdup_n_f32(3.f);
+ const auto const_inv_6_f32 = svdup_n_f32(0.166666667f);
+
+ // Initialise scale/offset for re-quantization
+ bool requant = true;
+ if(qi_in.scale == qi_out.scale && qi_in.offset == qi_out.offset)
+ {
+ requant = false;
+ }
+ float s = qi_in.scale / qi_out.scale;
+ float o = -qi_in.offset * s + qi_out.offset;
+ auto vs = svdup_n_f32(s);
+ auto vo = svdup_n_f32(o);
+
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const uint8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<uint8_t *>(output.ptr());
+
+ svuint8_t tmp;
+
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b8(x, window_end_x);
+ do
+ {
+ const auto vin = svld1_u8(pg, input_ptr + x);
+ if(act == ActivationLayerInfo::ActivationFunction::RELU)
+ {
+ // Perform activation
+ tmp = svmax_u8_z(pg, vconst_0, vin);
+ // Re-quantize to new output space
+ tmp = requant ? svmla_qasymm8_z(pg, tmp, vs, vo) : tmp;
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = svmin_u8_z(pg, va, svmax_u8_z(pg, vconst_0, vin));
+ // Re-quantize to new output space
+ tmp = requant ? svmla_qasymm8_z(pg, tmp, vs, vo) : tmp;
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = svmin_u8_z(pg, va, svmax_u8_z(pg, vb, vin));
+ // Re-quantize to new output space
+ tmp = svmla_qasymm8_z(pg, tmp, vs, vo);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+ // Perform activation
+ const svfloat32x4_t tmp_dep =
+ {
+ { {
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 0))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 1))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 2))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 3))))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_z(pg, tmp_dep, qi_out);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+ // Perform activation
+ const svfloat32x4_t tmp_dep =
+ {
+ { {
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 0), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 1), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 2), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 3), vb_f32))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_z(pg, tmp_dep, qi_out);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
+ {
+ // De-quantize
+ const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+ // Perform activation
+ const svfloat32x4_t tmp_dep =
+ {
+ { {
+ svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 0), const_3_f32))))),
+ svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))),
+ svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))),
+ svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32))))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_z(pg, tmp_dep, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+
+ svst1_u8(pg, output_ptr + x, tmp);
+
+ x += svcntb();
+ pg = svwhilelt_b8(x, window_end_x);
+
+ }
+ while(svptest_any(svptrue_b8(), pg));
+
+ },
+ input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */ \ No newline at end of file
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp b/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp
new file mode 100644
index 0000000000..f34bee88fc
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/qasymm8_signed.cpp
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/Window.h"
+#include "src/core/NEON/wrapper/wrapper.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEAsymm.h"
+#include "src/core/NEON/SVEMath.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void qasymm8_signed_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationLayerInfo::ActivationFunction act = act_info.activation();
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
+ const auto va = svdup_n_s8(quantize_qasymm8_signed(act_info.a(), qi_in));
+ const auto vb = svdup_n_s8(quantize_qasymm8_signed(act_info.b(), qi_in));
+ const auto const_0 = quantize_qasymm8_signed(0.f, qi_in);
+ const auto vconst_0 = svdup_n_s8(const_0);
+ const auto vconst_1 = svdup_n_f32(1.f);
+ const auto va_f32 = svdup_n_f32(act_info.a());
+ const auto vb_f32 = svdup_n_f32(act_info.b());
+ const auto const_6_f32 = svdup_n_f32(6.f);
+ const auto const_0_f32 = svdup_n_f32(0.f);
+ const auto const_3_f32 = svdup_n_f32(3.f);
+ const auto const_inv_6_f32 = svdup_n_f32(0.166666667f);
+
+ // Initialise scale/offset for re-quantization
+ bool requant = true;
+ if(qi_in.scale == qi_out.scale && qi_in.offset == qi_out.offset)
+ {
+ requant = false;
+ }
+ float s = qi_in.scale / qi_out.scale;
+ float o = -qi_in.offset * s + qi_out.offset;
+ auto vs = svdup_n_f32(s);
+ auto vo = svdup_n_f32(o);
+
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const int8_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int8_t *>(output.ptr());
+
+ svint8_t tmp;
+
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b8(x, window_end_x);
+ do
+ {
+ const auto vin = svld1_s8(pg, input_ptr + x);
+ if(act == ActivationLayerInfo::ActivationFunction::RELU)
+ {
+ // Perform activation
+ tmp = svmax_s8_z(pg, vconst_0, vin);
+ // Re-quantize to new output space
+ tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = svmin_s8_z(pg, va, svmax_s8_z(pg, vconst_0, vin));
+ // Re-quantize to new output space
+ tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+ {
+ // Perform activation
+ tmp = svmin_s8_z(pg, va, svmax_s8_z(pg, vb, vin));
+ // Re-quantize to new output space
+ tmp = requant ? svmla_qasymm8_signed_z(pg, tmp, vs, vo) : tmp;
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+ // Perform activation
+ const svfloat32x4_t tmp_dep =
+ {
+ { {
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 0))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 1))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 2))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget4_f32(vin_deq, 3))))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ // De-quantize
+ const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+ // Perform activation
+ const svfloat32x4_t tmp_dep =
+ {
+ { {
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 0), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 1), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 2), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget4_f32(vin_deq, 3), vb_f32))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::HARD_SWISH)
+ {
+ // De-quantize
+ const auto vin_deq = svdequantize_z(pg, vin, qi_in);
+ // Perform activation
+ const svfloat32x4_t tmp_dep =
+ {
+ { {
+ svmul_f32_z(pg, svget4_f32(vin_deq, 0), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 0), const_3_f32))))),
+ svmul_f32_z(pg, svget4_f32(vin_deq, 1), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 1), const_3_f32))))),
+ svmul_f32_z(pg, svget4_f32(vin_deq, 2), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 2), const_3_f32))))),
+ svmul_f32_z(pg, svget4_f32(vin_deq, 3), svmul_f32_z(pg, const_inv_6_f32, svmin_f32_z(pg, const_6_f32, svmax_f32_z(pg, const_0_f32, svadd_f32_z(pg, svget4_f32(vin_deq, 3), const_3_f32))))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_signed_z(pg, tmp_dep, qi_out);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+
+ svst1_s8(pg, output_ptr + x, tmp);
+
+ x += svcntb();
+ pg = svwhilelt_b8(x, window_end_x);
+
+ }
+ while(svptest_any(svptrue_b8(), pg));
+ },
+ input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */
diff --git a/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp b/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp
new file mode 100644
index 0000000000..1432e3bbdf
--- /dev/null
+++ b/src/core/NEON/kernels/activation/impl/SVE/qsymm16.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2020 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/ITensorPack.h"
+#include "arm_compute/core/Window.h"
+#include "arm_compute/core/experimental/Types.h"
+#include "src/core/common/StdTypes.h"
+#include "src/core/common/Validate.h"
+
+#include <cmath>
+#include <cstddef>
+
+#if defined(__ARM_FEATURE_SVE2)
+#include "src/core/NEON/SVEMath.h"
+#include "src/core/NEON/SVESymm.h"
+#include <arm_sve.h>
+
+namespace arm_compute
+{
+namespace cpu
+{
+void qsymm16_sve_activation(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
+{
+ const auto window_start_x = static_cast<int>(window.x().start());
+ const auto window_end_x = static_cast<int>(window.x().end());
+ const ActivationLayerInfo::ActivationFunction act = act_info.activation();
+
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+
+ const UniformQuantizationInfo qi_in = src->info()->quantization_info().uniform();
+ const UniformQuantizationInfo qi_out = dst->info()->quantization_info().uniform();
+ const auto vconst_1 = svdup_n_f32(1.f);
+ const auto va_f32 = svdup_n_f32(act_info.a());
+ const auto vb_f32 = svdup_n_f32(act_info.b());
+
+ execute_window_loop(win_collapsed, [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const int16_t *>(input.ptr());
+ const auto output_ptr = reinterpret_cast<int16_t *>(output.ptr());
+
+ svint16_t tmp;
+
+ int x = window_start_x;
+ svbool_t pg = svwhilelt_b16(x, window_end_x);
+ do
+ {
+ const auto vin = svld1_s16(pg, input_ptr + x);
+ if(act == ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ // De-quantize
+ auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+ // Perform activation
+ const svfloat32x2_t tmp_dep =
+ {
+ { {
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 0))))),
+ svdiv_f32_z(pg, vconst_1, svadd_f32_z(pg, vconst_1, svexp_f32_z(pg, svneg_f32_z(pg, svget2_f32(vin_deq, 1))))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+ }
+ else if(act == ActivationLayerInfo::ActivationFunction::TANH)
+ {
+ // De-quantize
+ auto vin_deq = svdequantize_qsymm16_z(pg, vin, qi_in.scale);
+ // Perform activation
+ const svfloat32x2_t tmp_dep =
+ {
+ { {
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 0), vb_f32))),
+ svmul_f32_z(pg, va_f32, svtanh_f32_z(pg, svmul_f32_z(pg, svget2_f32(vin_deq, 1), vb_f32))),
+ }
+ }
+ };
+ // Re-quantize to new output space
+ tmp = svquantize_qsymm16_z(pg, tmp_dep, qi_out.scale);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unsupported activation function");
+ }
+
+ svst1_s16(pg, output_ptr + x, tmp);
+
+ x += svcnth();
+ pg = svwhilelt_b16(x, window_end_x);
+
+ }
+ while(svptest_any(svptrue_b16(), pg));
+ },
+ input, output);
+}
+} // namespace cpu
+} // namespace arm_compute
+#endif /* defined(__ARM_FEATURE_SVE2) */
diff --git a/src/core/NEON/kernels/activation/impl/list.h b/src/core/NEON/kernels/activation/impl/list.h
index 3b48ee3e22..db6c5b21b8 100644
--- a/src/core/NEON/kernels/activation/impl/list.h
+++ b/src/core/NEON/kernels/activation/impl/list.h
@@ -32,10 +32,15 @@ namespace cpu
void func_name(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window)
DECLARE_ACTIVATION_KERNEL(qasymm8_neon_activation);
+DECLARE_ACTIVATION_KERNEL(qasymm8_sve_activation);
DECLARE_ACTIVATION_KERNEL(qasymm8_signed_neon_activation);
+DECLARE_ACTIVATION_KERNEL(qasymm8_signed_sve_activation);
DECLARE_ACTIVATION_KERNEL(qsymm16_neon_activation);
+DECLARE_ACTIVATION_KERNEL(qsymm16_sve_activation);
DECLARE_ACTIVATION_KERNEL(fp16_neon_activation);
+DECLARE_ACTIVATION_KERNEL(fp16_sve_activation);
DECLARE_ACTIVATION_KERNEL(fp32_neon_activation);
+DECLARE_ACTIVATION_KERNEL(fp32_sve_activation);
#undef DECLARE_ACTIVATION_KERNEL
} // namespace cpu
diff --git a/src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp b/src/core/NEON/kernels/floor/impl/NEON/fp16.cpp
index 4f56ca9daf..4f56ca9daf 100644
--- a/src/core/NEON/kernels/floor/impl/fp16_neon_floor.cpp
+++ b/src/core/NEON/kernels/floor/impl/NEON/fp16.cpp
diff --git a/src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp b/src/core/NEON/kernels/floor/impl/NEON/fp32.cpp
index 3f4b14b3e5..3f4b14b3e5 100644
--- a/src/core/NEON/kernels/floor/impl/fp32_neon_floor.cpp
+++ b/src/core/NEON/kernels/floor/impl/NEON/fp32.cpp