diff options
author | Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> | 2023-12-05 14:27:31 +0000 |
---|---|---|
committer | Mohmun02 <MohammedSuhail.Munshi@arm.com> | 2024-01-10 09:56:39 +0000 |
commit | 7467ba8fac0afb19d750b3bdda9ba95002634038 (patch) | |
tree | e47d4989251f03d13590e6b22d9bd228fd1efe34 /src/cpu/kernels/activation/generic | |
parent | 7fe7791468978429ab02343a8485b51b39832027 (diff) | |
download | ComputeLibrary-7467ba8fac0afb19d750b3bdda9ba95002634038.tar.gz |
Use look up table for fp16 activation
- Enables FP16 lut for logistic activation
- Adds LUTManager to re-use lut where appropriate.
Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: I94667b63b452a8e58a1eb59cb0b5866178954523
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10864
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/cpu/kernels/activation/generic')
-rw-r--r-- | src/cpu/kernels/activation/generic/sve/fp16.cpp | 29 |
1 files changed, 28 insertions, 1 deletions
diff --git a/src/cpu/kernels/activation/generic/sve/fp16.cpp b/src/cpu/kernels/activation/generic/sve/fp16.cpp index 97399e01e0..19d9126556 100644 --- a/src/cpu/kernels/activation/generic/sve/fp16.cpp +++ b/src/cpu/kernels/activation/generic/sve/fp16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023 Arm Limited. + * Copyright (c) 2020-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,6 +30,7 @@ #include "arm_compute/function_info/ActivationLayerInfo.h" #include "src/core/NEON/SVEMath.h" +#include "src/cpu/kernels/lut/list.h" #include <arm_sve.h> #include <cmath> @@ -141,6 +142,32 @@ void sve_fp16_activation(const ITensor *src, ITensor *dst, const ActivationLayer }, input, output); } + +void sve_fp16_activation_lut(const ITensor *src, + ITensor *dst, + const ActivationLayerInfo &act_info, + const Window &window) +{ + ARM_COMPUTE_ERROR_ON(src->info()->data_type() != DataType::F16); + const auto window_start_x = window.x().start(); + const auto window_end_x = window.x().end(); + const auto size = window_end_x - window_start_x; + Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); + win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); + + Iterator input(src, win_collapsed); + Iterator output(dst, win_collapsed); + execute_window_loop( + win_collapsed, + [&](const Coordinates &) + { + const auto input_ptr = reinterpret_cast<const uint16_t *>(input.ptr()); + auto output_ptr = reinterpret_cast<uint16_t *>(output.ptr()); + lut_u16_sve(reinterpret_cast<const uint16_t *>(act_info.lut_fp16().data()), 1U /* num_strings (UNUSED) */, + size, input_ptr + window_start_x, output_ptr + window_start_x); + }, + input, output); +} } // namespace cpu } // namespace arm_compute #endif /* defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) && defined(ENABLE_FP16_KERNELS) */ |