aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/kernels/activation/generic/sve/fp16.cpp
diff options
context:
space:
mode:
authorMohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>2023-12-05 14:27:31 +0000
committerMohmun02 <MohammedSuhail.Munshi@arm.com>2024-01-10 09:56:39 +0000
commit7467ba8fac0afb19d750b3bdda9ba95002634038 (patch)
treee47d4989251f03d13590e6b22d9bd228fd1efe34 /src/cpu/kernels/activation/generic/sve/fp16.cpp
parent7fe7791468978429ab02343a8485b51b39832027 (diff)
downloadComputeLibrary-7467ba8fac0afb19d750b3bdda9ba95002634038.tar.gz
Use look up table for fp16 activation
- Enables FP16 lut for logistic activation - Adds LUTManager to re-use lut where appropriate. Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: I94667b63b452a8e58a1eb59cb0b5866178954523 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10864 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/activation/generic/sve/fp16.cpp')
-rw-r--r--src/cpu/kernels/activation/generic/sve/fp16.cpp29
1 files changed, 28 insertions, 1 deletions
diff --git a/src/cpu/kernels/activation/generic/sve/fp16.cpp b/src/cpu/kernels/activation/generic/sve/fp16.cpp
index 97399e01e0..19d9126556 100644
--- a/src/cpu/kernels/activation/generic/sve/fp16.cpp
+++ b/src/cpu/kernels/activation/generic/sve/fp16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020-2023 Arm Limited.
+ * Copyright (c) 2020-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,6 +30,7 @@
#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "src/core/NEON/SVEMath.h"
+#include "src/cpu/kernels/lut/list.h"
#include <arm_sve.h>
#include <cmath>
@@ -141,6 +142,32 @@ void sve_fp16_activation(const ITensor *src, ITensor *dst, const ActivationLayer
},
input, output);
}
+
+void sve_fp16_activation_lut(const ITensor *src,
+ ITensor *dst,
+ const ActivationLayerInfo &act_info,
+ const Window &window)
+{
+ ARM_COMPUTE_ERROR_ON(src->info()->data_type() != DataType::F16);
+ const auto window_start_x = window.x().start();
+ const auto window_end_x = window.x().end();
+ const auto size = window_end_x - window_start_x;
+ Window win_collapsed = window.collapse_if_possible(window, Window::DimZ);
+ win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator input(src, win_collapsed);
+ Iterator output(dst, win_collapsed);
+ execute_window_loop(
+ win_collapsed,
+ [&](const Coordinates &)
+ {
+ const auto input_ptr = reinterpret_cast<const uint16_t *>(input.ptr());
+ auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr());
+ lut_u16_sve(reinterpret_cast<const uint16_t *>(act_info.lut_fp16().data()), 1U /* num_strings (UNUSED) */,
+ size, input_ptr + window_start_x, output_ptr + window_start_x);
+ },
+ input, output);
+}
} // namespace cpu
} // namespace arm_compute
#endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */