aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation
diff options
context:
space:
mode:
Diffstat (limited to 'src/cpu/kernels/activation')
-rw-r--r--src/cpu/kernels/activation/generic/sve/fp16.cpp29
-rw-r--r--src/cpu/kernels/activation/list.h9
2 files changed, 33 insertions, 5 deletions
diff --git a/src/cpu/kernels/activation/generic/sve/fp16.cpp b/src/cpu/kernels/activation/generic/sve/fp16.cpp
index 97399e01e0..19d9126556 100644
--- a/src/cpu/kernels/activation/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/activation/generic/sve/fp16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2023 Arm Limited.
+ * Copyright (c) 2020-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "src/core/NEON/SVEMath.h"
+#include "src/cpu/kernels/lut/list.h"
#include <arm_sve.h>
#include <cmath>
@@ -141,6 +142,32 @@ void sve_fp16_activation(const ITensor *src, ITensor *dst, const ActivationLayer
},
input, output);
}
+
+void sve_fp16_activation_lut(const ITensor *src,
+ ITensor *dst,
+ const ActivationLayerInfo &act_info,
+ const Window &window)
+{
+ ARM_COMPUTE_ERROR_ON(src->info()->data_type() != DataType::F16);
+ const auto window_start_x = window.x().start();
+ const auto window_end_x = window.x().end();
+ const auto size = window_end_x - window_start_x;
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const uint16_t *>(input.ptr());
+ auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
+ lut_u16_sve(reinterpret_cast<const uint16_t *>(act_info.lut_fp16().data()), 1U /* num_strings (UNUSED) */,
+ size, input_ptr + window_start_x, output_ptr + window_start_x);
+ },
+ input, output);
+}
} // namespace cpu
} // namespace arm_compute
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */
diff --git a/src/cpu/kernels/activation/list.h b/src/cpu/kernels/activation/list.h
index 6550ddfeca..8c24adc3fe 100644
--- a/src/cpu/kernels/activation/list.h
+++ b/src/cpu/kernels/activation/list.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2023 Arm Limited.
+ * Copyright (c) 2020-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -21,8 +21,8 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef SRC_CORE_NEON_KERNELS_ACTIVATION_LIST_H
-#define SRC_CORE_NEON_KERNELS_ACTIVATION_LIST_H
+#ifndef ACL_SRC_CPU_KERNELS_ACTIVATION_LIST_H
+#define ACL_SRC_CPU_KERNELS_ACTIVATION_LIST_H
namespace arm_compute
{
@@ -42,6 +42,7 @@ DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_signed_activation);
DECLARE_ACTIVATION_KERNEL(neon_qsymm16_activation);
DECLARE_ACTIVATION_KERNEL(sve2_qsymm16_activation);
DECLARE_ACTIVATION_KERNEL(sve_fp16_activation);
+DECLARE_ACTIVATION_KERNEL(sve_fp16_activation_lut);
DECLARE_ACTIVATION_KERNEL(sve_fp32_activation);
DECLARE_ACTIVATION_KERNEL(neon_fp16_activation);
DECLARE_ACTIVATION_KERNEL(neon_fp32_activation);
@@ -50,4 +51,4 @@ DECLARE_ACTIVATION_KERNEL(neon_fp32_activation);
} // namespace cpu
} // namespace arm_compute
-#endif /* SRC_CORE_NEON_KERNELS_ACTIVATION_LIST_H */
+#endif // ACL_SRC_CPU_KERNELS_ACTIVATION_LIST_H