From 66704133478dcb2065e6313590187315de74ec5f Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Fri, 7 Oct 2022 15:58:05 +0100 Subject: Fix LUT-based activation layer * Use the window instead of the tensor shape to determine the number of elements in the x-dimension. * Remove the LUT implementation in 32-bit build. Resolves: COMPMID-5641 Signed-off-by: Viet-Hoa Do Change-Id: I0a79aa38d8f6a105ad01785bd94571f5a2ecb348 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8380 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello --- src/cpu/kernels/activation/generic/neon/lut.cpp | 14 ++++---------- src/cpu/kernels/activation/list.h | 3 +++ 2 files changed, 7 insertions(+), 10 deletions(-) (limited to 'src/cpu/kernels') diff --git a/src/cpu/kernels/activation/generic/neon/lut.cpp b/src/cpu/kernels/activation/generic/neon/lut.cpp index ddf7f1bd61..b5c29ce07b 100644 --- a/src/cpu/kernels/activation/generic/neon/lut.cpp +++ b/src/cpu/kernels/activation/generic/neon/lut.cpp @@ -411,11 +411,11 @@ void substitute_bytes_neon( #endif // __aarch64__ } // namespace +#ifdef __aarch64__ void neon_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) { ARM_COMPUTE_ERROR_ON(!ActivationLayerInfo::is_lut_supported(act_info.activation(), src->info()->data_type())); -#ifdef __aarch64__ - const int window_step_x = src->info()->tensor_shape().x(); + const auto window_end_x = window.x().end(); Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); Iterator input(src, win_collapsed); @@ -424,16 +424,10 @@ void neon_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLa { const auto input_ptr = reinterpret_cast(input.ptr()); auto output_ptr = reinterpret_cast(output.ptr()); - substitute_bytes_neon(act_info.lut().data(), 1u, window_step_x, &input_ptr, &output_ptr); + substitute_bytes_neon(act_info.lut().data(), 1u, window_end_x, &input_ptr, &output_ptr); }, input, output); -#else // #ifdef __aarch64__ - ARM_COMPUTE_UNUSED(src); - ARM_COMPUTE_UNUSED(dst); - ARM_COMPUTE_UNUSED(act_info); - ARM_COMPUTE_UNUSED(window); - ARM_COMPUTE_ERROR("LUT Only supported in aarch64."); -#endif // __aarch64__ } +#endif // __aarch64__ } // namespace cpu } // namespace arm_compute diff --git a/src/cpu/kernels/activation/list.h b/src/cpu/kernels/activation/list.h index 3850d4de6b..c0a2446748 100644 --- a/src/cpu/kernels/activation/list.h +++ b/src/cpu/kernels/activation/list.h @@ -31,7 +31,10 @@ namespace cpu #define DECLARE_ACTIVATION_KERNEL(func_name) \ void func_name(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) +#ifdef __aarch64__ DECLARE_ACTIVATION_KERNEL(neon_q8_activation_lut); +#endif // __aarch64__ + DECLARE_ACTIVATION_KERNEL(neon_qasymm8_activation); DECLARE_ACTIVATION_KERNEL(sve2_qasymm8_activation); DECLARE_ACTIVATION_KERNEL(neon_qasymm8_signed_activation); -- cgit v1.2.1