From 7fe7791468978429ab02343a8485b51b39832027 Mon Sep 17 00:00:00 2001 From: Sangwon Ha Date: Tue, 2 Jan 2024 22:46:24 +0000 Subject: Prevent RELU from being processed thru LUT in INT8 - For quantized RELU activation, de-quantization and re-quantization is not required since comparison against the quantization bias is only required. Resolves: COMPMID-6340 Change-Id: I574bd220f3d0d893b7f7c4819a883e2a131f61f4 Signed-off-by: Sangwon Ha Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10916 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Jakub Sujak Reviewed-by: Benchmark: Arm Jenkins --- src/cpu/kernels/CpuActivationKernel.cpp | 16 +++++++++------- src/cpu/kernels/activation/generic/neon/lut.cpp | 7 ++++--- src/cpu/kernels/activation/generic/sve2/lut.cpp | 7 ++++--- 3 files changed, 17 insertions(+), 13 deletions(-) diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index 50bf672d3c..3f3d72e8df 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,7 +49,8 @@ static const std::vector available_kernel [](const ActivationDataTypeISASelectorData &data) { return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && - data.cpumodel == CPUModel::A510 && data.isa.sve2; + data.cpumodel == CPUModel::A510 && data.isa.sve2 && + data.f != ActivationLayerInfo::ActivationFunction::RELU; }, REGISTER_QASYMM8_SVE2(arm_compute::cpu::sve2_q8_activation_lut)}, #endif // ARM_COMPUTE_ENABLE_SVE @@ -57,7 +58,10 @@ static const std::vector available_kernel {// Neon LUT implementantion takes precedence "neon_q8_activation_lut", [](const ActivationDataTypeISASelectorData &data) - { return data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED; }, + { + return (data.dt == DataType::QASYMM8 || data.dt == DataType::QASYMM8_SIGNED) && + data.f != ActivationLayerInfo::ActivationFunction::RELU; + }, REGISTER_Q8_NEON(arm_compute::cpu::neon_q8_activation_lut)}, #endif // __aarch64__ {"sve2_qu8_activation", @@ -214,9 +218,6 @@ void init_lut(ActivationLayerInfo::ActivationFunction act_func, case ActivationLayerInfo::ActivationFunction::LINEAR: tmp_f = a * tmp_f + b; break; - case ActivationLayerInfo::ActivationFunction::RELU: - tmp_f = std::max<>(0.f, tmp_f); - break; case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: tmp_f = std::min<>(a, std::max(0.f, tmp_f)); break; @@ -278,7 +279,8 @@ void CpuActivationKernel::configure(const ITensorInfo *src, ITensorInfo *dst, Ac _name = std::string("CpuActivationKernel").append("/").append(uk->name); #ifdef __aarch64__ - if (src->data_type() == DataType::QASYMM8 || src->data_type() == DataType::QASYMM8_SIGNED) + if ((src->data_type() == DataType::QASYMM8 || src->data_type() == DataType::QASYMM8_SIGNED) && + activation_info.activation() != ActivationFunction::RELU) { ActivationLayerInfo::LookupTable256 tmp_lut; init_lut(activation_info.activation(), src->data_type(), src->quantization_info().uniform(), diff --git a/src/cpu/kernels/activation/generic/neon/lut.cpp b/src/cpu/kernels/activation/generic/neon/lut.cpp index f289c80d4b..ddd186f9cb 100644 --- a/src/cpu/kernels/activation/generic/neon/lut.cpp +++ b/src/cpu/kernels/activation/generic/neon/lut.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,9 @@ namespace cpu #ifdef __aarch64__ void neon_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) { - ARM_COMPUTE_ERROR_ON(src->info()->data_type() != DataType::QASYMM8 && - src->info()->data_type() != DataType::QASYMM8_SIGNED); + ARM_COMPUTE_ERROR_ON( // LUT does not provide any performance benefit for ReLU as it's a single max() operation + (src->info()->data_type() != DataType::QASYMM8 && src->info()->data_type() != DataType::QASYMM8_SIGNED) || + act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU); const auto window_end_x = window.x().end(); Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); diff --git a/src/cpu/kernels/activation/generic/sve2/lut.cpp b/src/cpu/kernels/activation/generic/sve2/lut.cpp index 2ed667debf..5db8595a75 100644 --- a/src/cpu/kernels/activation/generic/sve2/lut.cpp +++ b/src/cpu/kernels/activation/generic/sve2/lut.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,8 +34,9 @@ namespace cpu #ifdef __aarch64__ void sve2_q8_activation_lut(const ITensor *src, ITensor *dst, const ActivationLayerInfo &act_info, const Window &window) { - ARM_COMPUTE_ERROR_ON(src->info()->data_type() != DataType::QASYMM8 && - src->info()->data_type() != DataType::QASYMM8_SIGNED); + ARM_COMPUTE_ERROR_ON( // LUT does not provide any performance benefit for ReLU as it's a single max() operation + (src->info()->data_type() != DataType::QASYMM8 && src->info()->data_type() != DataType::QASYMM8_SIGNED) || + act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU); const auto window_end_x = window.x().end(); Window win_collapsed = window.collapse_if_possible(window, Window::DimZ); win_collapsed.set(Window::DimX, Window::Dimension(0, 1, 1)); -- cgit v1.2.1