From a0d1183a15c6788676a12160f56e4c576ee1a84b Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 17 Jan 2018 16:13:46 +0000 Subject: COMPMID-751 QASYMM8 ActivationLayer optimisation: don't requantize if not necessary Change-Id: Iea8a21f7c71025bfde6fdf7c7a7c92ba749b189b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/116673 Reviewed-by: Gian Marco Iodice Tested-by: Jenkins --- src/core/CL/cl_kernels/activation_layer_qa8.cl | 14 ++++++++----- src/core/CL/kernels/CLActivationLayerKernel.cpp | 27 ++++++++++++++----------- 2 files changed, 24 insertions(+), 17 deletions(-) (limited to 'src/core/CL') diff --git a/src/core/CL/cl_kernels/activation_layer_qa8.cl b/src/core/CL/cl_kernels/activation_layer_qa8.cl index 910a93fdc1..02668f7ac1 100644 --- a/src/core/CL/cl_kernels/activation_layer_qa8.cl +++ b/src/core/CL/cl_kernels/activation_layer_qa8.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -89,12 +89,16 @@ __kernel void activation_layer_qa8( // Perform activation data = ACTIVATION_OP(ACT, data); +#if defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) // requantize to output space - float16 fdata = convert_float16(data); - fdata = round((fdata - (float)O1_VAL) * ((float)S1_VAL / (float)S2_VAL) + (float)O2_VAL); - uchar16 qdata = convert_uchar16_sat(fdata); + VEC_DATA_TYPE(float, VEC_SIZE) + fdata = CONVERT(data, VEC_DATA_TYPE(float, VEC_SIZE)); + + fdata = round((fdata - (float)O1_VAL) * ((float)S1_VAL / (float)S2_VAL) + (float)O2_VAL); + data = CONVERT_SAT(fdata, VEC_DATA_TYPE(uchar, VEC_SIZE)); +#endif // defined(O1_VAL) && defined(O2_VAL) && defined(S1_VAL) && defined(S2_VAL) // Store result VSTORE(VEC_SIZE) - (qdata, 0, (__global DATA_TYPE *)output.ptr); + (data, 0, (__global DATA_TYPE *)output.ptr); } diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index eecc94f23c..d85de88ae2 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -145,18 +145,21 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); - // Set scale and offset of the input and output - if(is_data_type_quantized_asymmetric(dt)) + // Set scale and offset of the input and output if they have different quantization info + if(is_data_type_quantized_asymmetric(dt) && output != nullptr) { - float s1 = input->info()->quantization_info().scale; - int o1 = input->info()->quantization_info().offset; - // If output is nullptr, assume same quantization scale/offset as input - float s2 = output != nullptr ? output->info()->quantization_info().scale : s1; - int o2 = output != nullptr ? output->info()->quantization_info().offset : o1; - build_opts.emplace(("-DS1_VAL=" + float_to_string_with_full_precision(s1))); - build_opts.emplace(("-DS2_VAL=" + float_to_string_with_full_precision(s2))); - build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); - build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); + const float s1 = input->info()->quantization_info().scale; + const float s2 = output->info()->quantization_info().scale; + const int o1 = input->info()->quantization_info().offset; + const int o2 = output->info()->quantization_info().offset; + + if(o1 != o2 || s1 != s2) + { + build_opts.emplace(("-DS1_VAL=" + float_to_string_with_full_precision(s1))); + build_opts.emplace(("-DS2_VAL=" + float_to_string_with_full_precision(s2))); + build_opts.emplace(("-DO1_VAL=" + support::cpp11::to_string(o1))); + build_opts.emplace(("-DO2_VAL=" + support::cpp11::to_string(o2))); + } } } else -- cgit v1.2.1