diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-05-21 13:32:43 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-06-03 14:51:29 +0000 |
commit | 4c5469b192665c94118a8a558787cb9cec2d0765 (patch) | |
tree | 168aa969de8243bdbb1f25247dd9f54d037ae32c /src/core | |
parent | 43a129e94df41f9ac8bc78b702da5a387ada0494 (diff) | |
download | ComputeLibrary-4c5469b192665c94118a8a558787cb9cec2d0765.tar.gz |
COMPMID-2225: Add interface support for new quantized data types.
Add support for:
-QSYMM8, 8-bit quantized symmetric
-QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization
Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1236
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
43 files changed, 303 insertions, 242 deletions
diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index d601dfc20d..65e6561b0a 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -122,42 +122,43 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act int a_const_int = 0; int b_const_int = 0; + const bool is_quantized_asymmetric = is_data_type_quantized_asymmetric(dt); // Create quantized version of constants a, b if needed - if(is_data_type_quantized(dt)) + if(is_quantized_asymmetric) { - a_const_int = input->info()->quantization_info().quantize(a_const, RoundingPolicy::TO_NEAREST_UP); - b_const_int = input->info()->quantization_info().quantize(b_const, RoundingPolicy::TO_NEAREST_UP); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + a_const_int = quantize_qasymm8(a_const, iq_info); + b_const_int = quantize_qasymm8(b_const, iq_info); } - const bool is_logistic_activation_quantized = is_data_type_quantized_asymmetric(dt) && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC; + const bool is_logistic_activation_quantized = is_quantized_asymmetric && act_info.activation() == ActivationLayerInfo::ActivationFunction::LOGISTIC; // Set build options CLBuildOptions build_opts; build_opts.add_option_if(!is_logistic_activation_quantized, "-DACT=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option(("-DDATA_TYPE=" + get_cl_type_from_data_type(dt))); build_opts.add_option(("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration))); - if(is_data_type_quantized(dt)) + if(is_quantized_asymmetric) { build_opts.add_option(("-DA_VAL=" + support::cpp11::to_string(a_const_int))); build_opts.add_option(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); - const int o1 = input->info()->quantization_info().offset; - const float s1 = input->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + // Quantized value of 0 corresponds to the offset o1 - build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(o1))); - build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(s1))); - build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(o1))); + build_opts.add_option(("-DCONST_0=" + support::cpp11::to_string(iq_info.offset))); + build_opts.add_option(("-DS1_VAL=" + float_to_string_with_full_precision(iq_info.scale))); + build_opts.add_option(("-DO1_VAL=" + support::cpp11::to_string(iq_info.offset))); // Set scale and offset of the input and output if they have different quantization info - if(is_data_type_quantized_asymmetric(dt) && output != nullptr) + if(is_quantized_asymmetric && output != nullptr) { - const float s2 = output->info()->quantization_info().scale; - const int o2 = output->info()->quantization_info().offset; + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); - if(o1 != o2 || s1 != s2) + if(iq_info != oq_info) { - build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(s2))); - build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(o2))); + build_opts.add_option(("-DS2_VAL=" + float_to_string_with_full_precision(oq_info.scale))); + build_opts.add_option(("-DO2_VAL=" + support::cpp11::to_string(oq_info.offset))); } } } @@ -171,7 +172,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act // Create kernel std::string kernel_name = std::string("activation_layer"); - if(is_data_type_quantized_asymmetric(dt)) + if(is_quantized_asymmetric) { kernel_name += is_logistic_activation_quantized ? std::string("_logistic_qa8") : std::string("_qa8"); } diff --git a/src/core/CL/kernels/CLComparisonKernel.cpp b/src/core/CL/kernels/CLComparisonKernel.cpp index 4f44851ef8..628f9f18e7 100644 --- a/src/core/CL/kernels/CLComparisonKernel.cpp +++ b/src/core/CL/kernels/CLComparisonKernel.cpp @@ -134,10 +134,13 @@ void CLComparisonKernel::configure(const ICLTensor *input1, const ICLTensor *inp build_opts.emplace("-DOP_NAME=" + lower_string(operation_name)); if(is_data_type_quantized_asymmetric(input1->info()->data_type())) { - build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + + build_opts.emplace("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset)); + build_opts.emplace("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset)); + build_opts.emplace("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.emplace("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); kernel_name += "_quantized"; } diff --git a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp index 1cae3712dc..5e1bbe944f 100644 --- a/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLDepthConcatenateLayerKernel.cpp @@ -99,10 +99,13 @@ void CLDepthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index cd25bb1e7f..615327a7cc 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -251,30 +251,34 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, if(is_qasymm) { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y)); - build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset)); - build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset)); - build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); + build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset)); + build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); if(act_info.enabled()) { - const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); - const int o1 = output->info()->quantization_info().offset; + const int a_val = quantize_qasymm8(act_info.a(), oq_info); + const int b_val = quantize_qasymm8(act_info.b(), oq_info); + const int o1 = oq_info.offset; build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); - const float s1 = input->info()->quantization_info().scale; + const float s1 = iq_info.scale; build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 758e99b77e..e32faa10df 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -213,30 +213,34 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, if(is_qasymm) { - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); + + float multiplier = iq_info.scale * wq_info.scale / oq_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); build_opts.add_option("-DSRC_DIM_1=" + support::cpp11::to_string(_input->info()->dimension(1))); - build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-_input->info()->quantization_info().offset)); - build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-_weights->info()->quantization_info().offset)); - build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(_output->info()->quantization_info().offset)); - build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * input->info()->quantization_info().offset * weights->info()->quantization_info().offset)); + build_opts.add_option("-DINPUT_OFFSET=" + support::cpp11::to_string(-iq_info.offset)); + build_opts.add_option("-DWEIGHTS_OFFSET=" + support::cpp11::to_string(-wq_info.offset)); + build_opts.add_option("-DOUTPUT_OFFSET=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DK_OFFSET=" + support::cpp11::to_string(9 * iq_info.offset * wq_info.offset)); build_opts.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); build_opts.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); if(act_info.enabled()) { - const int a_val = output->info()->quantization_info().quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); - const int o1 = output->info()->quantization_info().offset; + const int a_val = quantize_qasymm8(act_info.a(), oq_info); + const int b_val = quantize_qasymm8(act_info.b(), oq_info); + const int o1 = oq_info.offset; build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); - const float s1 = input->info()->quantization_info().scale; + const float s1 = iq_info.scale; build_opts.add_option("-DS1_VAL=" + float_to_string_with_full_precision(s1)); build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 28d4ff2759..0312a57664 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -72,9 +72,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu _input = input; _output = output; - const DataLayout data_layout = input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const DataLayout data_layout = input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); // Create kernel CLBuildOptions build_opts; @@ -96,7 +97,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.add_option("-D" + string_from_data_layout(input->info()->data_layout())); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()), - "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), + "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0"); _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts.options())); diff --git a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp index 78cc5596dd..0b066837a9 100644 --- a/src/core/CL/kernels/CLDequantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLDequantizationLayerKernel.cpp @@ -95,10 +95,12 @@ void CLDequantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *o } ICLKernel::configure_internal(win); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(output->info()->data_type())); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(output_width_x - vec_size_x, 0))); diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 12affa9880..3e158a52ff 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -452,16 +452,20 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL // Set static kernel arguments if(is_data_type_quantized_asymmetric(data_type)) { + const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform(); + int output_multiplier = 0; int output_shift = 0; - float multiplier = _input->info()->quantization_info().scale * _weights->info()->quantization_info().scale / _output->info()->quantization_info().scale; + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; ARM_COMPUTE_THROW_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1; - _kernel.setArg(idx++, -_input->info()->quantization_info().offset); - _kernel.setArg(idx++, -_weights->info()->quantization_info().offset); - _kernel.setArg(idx++, _output->info()->quantization_info().offset); + _kernel.setArg(idx++, -iqinfo.offset); + _kernel.setArg(idx++, -wqinfo.offset); + _kernel.setArg(idx++, oqinfo.offset); _kernel.setArg(idx++, output_multiplier); _kernel.setArg(idx++, output_shift); } diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 414b040f4c..1d9c71555a 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -134,12 +134,16 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i build_opts.add_option("-DOP=" + operation_string); if(is_data_type_quantized_asymmetric(input1.data_type())) { - build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1.quantization_info().offset)); - build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2.quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output.quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1.quantization_info().scale)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2.quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output.quantization_info().scale)); + const UniformQuantizationInfo iq1info = input1.quantization_info().uniform(); + const UniformQuantizationInfo iq2info = input2.quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output.quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1info.offset)); + build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1info.scale)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } return build_opts; } diff --git a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp index 11a4292270..0ff2f1343a 100644 --- a/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -104,9 +104,12 @@ void CLGEMMMatrixVectorMultiplyKernel::configure(const ICLTensor *input0, const // Add static arguments if(is_quantized) { + const UniformQuantizationInfo iq0_info = _input0->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq1_info = _input1->info()->quantization_info().uniform(); + unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor() + num_arguments_per_1D_tensor(); - _kernel.setArg<int>(idx++, -_input0->info()->quantization_info().offset); - _kernel.setArg<int>(idx++, -_input1->info()->quantization_info().offset); + _kernel.setArg<int>(idx++, -iq0_info.offset); + _kernel.setArg<int>(idx++, -iq1_info.offset); } // Configure kernel window diff --git a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp index e3f2a96281..4da3e245c0 100644 --- a/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLHeightConcatenateLayerKernel.cpp @@ -133,10 +133,13 @@ void CLHeightConcatenateLayerKernel::configure(const ICLTensor *input, unsigned if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 8caa927f8b..10d6e68cd9 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -162,10 +162,11 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size const std::pair<unsigned int, unsigned int> convolved_dims = scaled_dimensions(input_width, input_height, kernel_dims.width, kernel_dims.height, conv_info, dilation); // Im2Col configuration - std::string kernel_name = "im2col_generic_"; - CLBuildOptions build_opts; - unsigned int num_elems_processed_per_iteration = 1; - bool is_padding_required_nchw = false; + std::string kernel_name = "im2col_generic_"; + CLBuildOptions build_opts; + unsigned int num_elems_processed_per_iteration = 1; + bool is_padding_required_nchw = false; + const UniformQuantizationInfo qinfo = input->quantization_info().uniform(); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); build_opts.add_option("-DELEMENT_SIZE=" + support::cpp11::to_string(input->element_size())); @@ -185,7 +186,7 @@ Im2ColConfiguration configure_opencl_kernel(const ITensorInfo *input, const Size build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x())); build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y())); build_opts.add_option_if(num_groups > 1, "-DNUM_GROUPS=" + support::cpp11::to_string(num_groups)); - build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->quantization_info().offset), "-DPAD_VALUE=0"); + build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(qinfo.offset), "-DPAD_VALUE=0"); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); if(data_layout == DataLayout::NHWC) diff --git a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp index 90330163ea..b255ba346f 100644 --- a/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp +++ b/src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp @@ -123,8 +123,9 @@ void CLNormalizePlanarYUVLayerKernel::configure(const ICLTensor *input, ICLTenso std::string kernel_name = "normalize_planar_yuv_layer_"; if(is_data_type_quantized(dt)) { - build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset))); - build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale))); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + build_opts.add_option(("-DOFFSET=" + support::cpp11::to_string(qinfo.offset))); + build_opts.add_option(("-DSCALE=" + support::cpp11::to_string(qinfo.scale))); kernel_name += "q8_"; } diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index dda9b16083..050bbb810b 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -181,12 +181,16 @@ void CLPixelWiseMultiplicationKernel::configure(const ICLTensor *input1, const I CLBuildOptions build_opts; if(is_quantized) { - build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(input1->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(input2->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + support::cpp11::to_string(iq1_info.offset)); + build_opts.add_option("-DOFFSET_IN2=" + support::cpp11::to_string(iq2_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + support::cpp11::to_string(iq1_info.scale)); + build_opts.add_option("-DSCALE_IN2=" + support::cpp11::to_string(iq2_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + support::cpp11::to_string(oq_info.scale)); kernel_name += "_quantized"; } else diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 7ccbda9be3..8eaf5bf76f 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -205,10 +205,13 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq_info.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq_info.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Check output dimensions diff --git a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp index 374b22eab1..22d4e3345f 100644 --- a/src/core/CL/kernels/CLQuantizationLayerKernel.cpp +++ b/src/core/CL/kernels/CLQuantizationLayerKernel.cpp @@ -93,10 +93,12 @@ void CLQuantizationLayerKernel::configure(const ICLTensor *input, ICLTensor *out } ICLKernel::configure_internal(win); + const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + // Create kernel CLBuildOptions build_opts; - build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(output->info()->quantization_info().offset)); + build_opts.add_option("-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(vec_size_x)); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_option_if(multi_access_x, "-DLAST_ACCESSED_X=" + support::cpp11::to_string(std::max<int>(input_width_x - vec_size_x, 0))); diff --git a/src/core/CL/kernels/CLRangeKernel.cpp b/src/core/CL/kernels/CLRangeKernel.cpp index eb8822b957..a22f5cb4cb 100644 --- a/src/core/CL/kernels/CLRangeKernel.cpp +++ b/src/core/CL/kernels/CLRangeKernel.cpp @@ -116,8 +116,9 @@ void CLRangeKernel::configure(ICLTensor *output, const float start, const float build_opts.add_option("-DSTEP=" + support::cpp11::to_string(step)); if(is_data_type_quantized_asymmetric(output->info()->data_type())) { - build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo qinfo = output->info()->quantization_info().uniform(); + build_opts.add_option("-DOFFSET_OUT=" + support::cpp11::to_string(qinfo.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(qinfo.scale)); kernel_name += "_quantized"; } // Create kernel diff --git a/src/core/CL/kernels/CLScaleKernel.cpp b/src/core/CL/kernels/CLScaleKernel.cpp index cd89d1c6db..488313fd12 100644 --- a/src/core/CL/kernels/CLScaleKernel.cpp +++ b/src/core/CL/kernels/CLScaleKernel.cpp @@ -206,8 +206,9 @@ void CLScaleKernel::configure(const ICLTensor *input, ICLTensor *output, Interpo build_opts.add_option_if_else(sampling_policy == SamplingPolicy::CENTER, "-DSAMPLING_POLICY_CENTER", "-DSAMPLING_POLICY_TOP_LEFT"); if(call_quantized_kernel) { - build_opts.add_option("-DSCALE=" + support::cpp11::to_string(input->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(input->info()->quantization_info().offset)); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + build_opts.add_option("-DSCALE=" + support::cpp11::to_string(qinfo.scale)); + build_opts.add_option("-DOFFSET=" + support::cpp11::to_string(qinfo.offset)); } std::string interpolation_name = string_from_interpolation_policy(policy); diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index e2d988103c..a9c08703c0 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -233,15 +233,16 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor _output = output; _sum = sum; - const DataType dt = input->info()->data_type(); - const size_t reduction_dim_size = input->info()->dimension(0); + const DataType dt = input->info()->data_type(); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); + const size_t reduction_dim_size = input->info()->dimension(0); // Set build options CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(dt)); build_opts.add_option_if(dt == DataType::F16, "-DUSE_F16"); build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); - build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); + build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); cl::NDRange lws_hint(cl::NullRange); std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") : @@ -338,9 +339,10 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su ARM_COMPUTE_ERROR_ON_NULLPTR(input, sum, output); // Note: output should always have a scale of 1/256 and offset 0 - const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0); - const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32); - const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type(); + const QuantizationInfo allowed_quantization_info = QuantizationInfo(1.F / 256, 0); + const bool is_quantized_asymmetric = (input->info()->data_type() == DataType::S32); + const DataType output_data_type = is_quantized_asymmetric ? DataType::QASYMM8 : input->info()->data_type(); + const UniformQuantizationInfo qinfo = input->info()->quantization_info().uniform(); // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), @@ -357,7 +359,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su CLBuildOptions build_opts; build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.add_options_if(is_quantized_asymmetric, - prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); + prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); // Create kernel std::string kernel_name = is_quantized_asymmetric ? "softmax_layer_norm_quantized" : "softmax_layer_norm"; diff --git a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp index 5f266c5ffa..bd4ff2c735 100644 --- a/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate2TensorsKernel.cpp @@ -116,12 +116,16 @@ void CLWidthConcatenate2TensorsKernel::configure(const ICLTensor *input1, const const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info()); if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp index 54edaafa29..a3ac102564 100644 --- a/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenate4TensorsKernel.cpp @@ -138,16 +138,22 @@ void CLWidthConcatenate4TensorsKernel::configure(const ICLTensor *input1, const const bool have_different_qinfo = helpers::tensor_info::tensors_have_different_quantization_info(output->info(), input1->info(), input2->info(), input3->info(), input4->info()); if(is_data_type_quantized_asymmetric(input1->info()->data_type()) && have_different_qinfo) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input1->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(input2->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(input3->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(input4->info()->quantization_info().scale)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iq1_info = input1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = input2->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq3_info = input3->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq4_info = input4->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iq1_info.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iq1_info.scale)); + build_opts.add_option("-DOFFSET_IN2=" + float_to_string_with_full_precision(iq2_info.offset)); + build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2_info.scale)); + build_opts.add_option("-DOFFSET_IN3=" + float_to_string_with_full_precision(iq3_info.offset)); + build_opts.add_option("-DSCALE_IN3=" + float_to_string_with_full_precision(iq3_info.scale)); + build_opts.add_option("-DOFFSET_IN4=" + float_to_string_with_full_precision(iq4_info.offset)); + build_opts.add_option("-DSCALE_IN4=" + float_to_string_with_full_precision(iq4_info.scale)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oq_info.offset)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oq_info.scale)); } // Create kernel diff --git a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp index 6c32cd2371..b577944a03 100644 --- a/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp +++ b/src/core/CL/kernels/CLWidthConcatenateLayerKernel.cpp @@ -109,10 +109,13 @@ void CLWidthConcatenateLayerKernel::configure(const ICLTensor *input, unsigned i if(is_data_type_quantized_asymmetric(input->info()->data_type()) && input->info()->quantization_info() != output->info()->quantization_info()) { - build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().offset)); - build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().offset)); - build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(input->info()->quantization_info().scale)); - build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(output->info()->quantization_info().scale)); + const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output->info()->quantization_info().uniform(); + + build_opts.add_option("-DOFFSET_IN1=" + float_to_string_with_full_precision(iqinfo.offset)); + build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(oqinfo.offset)); + build_opts.add_option("-DSCALE_IN1=" + float_to_string_with_full_precision(iqinfo.scale)); + build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } // Create kernel diff --git a/src/core/NEON/kernels/NEActivationLayerKernel.cpp b/src/core/NEON/kernels/NEActivationLayerKernel.cpp index bc6a281353..3f71553926 100644 --- a/src/core/NEON/kernels/NEActivationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEActivationLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" @@ -320,15 +319,15 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat Iterator input(_input, win_collapsed); Iterator output(_output, win_collapsed); - const QuantizationInfo qi_in = _input->info()->quantization_info(); - const QuantizationInfo qi_out = _output->info()->quantization_info(); - const qasymm8x16_t va = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset)); - const qasymm8x16_t vb = vdupq_n_u8(sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset)); - const qasymm8_t a = sqcvt_qasymm8_f32(_act_info.a(), qi_in.scale, qi_in.offset); - const qasymm8_t b = sqcvt_qasymm8_f32(_act_info.b(), qi_in.scale, qi_in.offset); - const qasymm8_t const_0 = sqcvt_qasymm8_f32(0.f, qi_in.scale, qi_in.offset); - const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); - const auto vconst_1 = vdupq_n_f32(1.f); + const UniformQuantizationInfo qi_in = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo qi_out = _output->info()->quantization_info().uniform(); + const qasymm8x16_t va = vdupq_n_u8(quantize_qasymm8(_act_info.a(), qi_in)); + const qasymm8x16_t vb = vdupq_n_u8(quantize_qasymm8(_act_info.b(), qi_in)); + const qasymm8_t a = quantize_qasymm8(_act_info.a(), qi_in); + const qasymm8_t b = quantize_qasymm8(_act_info.b(), qi_in); + const qasymm8_t const_0 = quantize_qasymm8(0.f, qi_in); + const qasymm8x16_t vconst_0 = vdupq_n_u8(const_0); + const auto vconst_1 = vdupq_n_f32(1.f); // Initialise scale/offset for re-quantization float s = qi_in.scale / qi_out.scale; @@ -415,9 +414,9 @@ typename std::enable_if<std::is_same<T, qasymm8_t>::value, void>::type NEActivat } else if(act == ActivationFunction::LOGISTIC) { - float tmp_f = scvt_f32_qasymm8(in, qi_in.scale, qi_in.offset); + float tmp_f = dequantize_qasymm8(in, qi_in); tmp_f = 1.f / (1.f + std::exp(-tmp_f)); - tmp = sqcvt_qasymm8_f32(tmp_f, qi_out.scale, qi_out.offset); + tmp = quantize_qasymm8(tmp_f, qi_out); } else { diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index ca79a0a419..164026c1ab 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -165,25 +165,26 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor const auto window_end_x = static_cast<int>(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t voffseto = vdupq_n_f32(output_offset); + const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale); + const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale); + const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset); + const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset); + const float32x4_t voffseto = vdupq_n_f32(oq_info.offset); if(is_broadcast_across_x) { - const bool is_broadcast_input_2 = input2_win.x().step() == 0; - Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; - Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; - const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; - const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const bool is_broadcast_input_2 = input2_win.x().step() == 0; + Window broadcast_win = is_broadcast_input_2 ? input2_win : input1_win; + Window non_broadcast_win = !is_broadcast_input_2 ? input2_win : input1_win; + const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; + const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); // Clear X Dimension on execution window as we handle manually non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1)); @@ -252,7 +253,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor for(; x < window_end_x; ++x) { const float afs = static_cast<int32_t>(*(non_broadcast_input_ptr + x) - non_broadcast_qinfo.offset) * non_broadcast_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info); } }, broadcast_input, non_broadcast_input, output); @@ -263,9 +264,6 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -328,9 +326,9 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor // Compute left-over elements for(; x < window_end_x; ++x) { - const float afs = static_cast<int32_t>((*(input1_ptr + x)) - input1_qinfo.offset) * input1_qinfo.scale; - const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - input2_qinfo.offset) * input2_qinfo.scale; - *(output_ptr + x) = out->info()->quantization_info().quantize((afs + bfs), RoundingPolicy::TO_NEAREST_UP); + const float afs = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale; + const float bfs = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale; + *(output_ptr + x) = quantize_qasymm8((afs + bfs), out->info()->quantization_info()); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 45e1562d8d..8874b52e19 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -87,10 +87,14 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); Iterator output(out, window); + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates &) { - const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), in1->info()->quantization_info()); - const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), in2->info()->quantization_info()); + const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), iq1_info); + const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), iq2_info); const float32x4x4_t ta3 = { @@ -102,7 +106,7 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 } }; - const uint8x16_t result = vquantize(ta3, out->info()->quantization_info()); + const uint8x16_t result = vquantize(ta3, oq_info); vst1q_u8(reinterpret_cast<qasymm8_t *>(output.ptr()), result); }, diff --git a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp index b360e9e6be..c9c70d6500 100644 --- a/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConcatenateLayerKernel.cpp @@ -53,9 +53,9 @@ void depth_concat(const ITensor *in, ITensor *out, int depth_offset, const Windo Iterator input(in, window); Iterator output(out, window); - const DataType dt = in->info()->data_type(); - const QuantizationInfo &input_qinfo = in->info()->quantization_info(); - const QuantizationInfo &output_qinfo = out->info()->quantization_info(); + const DataType dt = in->info()->data_type(); + const UniformQuantizationInfo input_qinfo = in->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index fdafc2da90..385be04e4a 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -51,8 +51,8 @@ public: static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const Size2D &dilation) { - const int input_offset = -input->info()->quantization_info().offset; - const int weights_offset = -weights->info()->quantization_info().offset; + const int input_offset = -input->info()->quantization_info().uniform().offset; + const int weights_offset = -weights->info()->quantization_info().uniform().offset; const int input_stride_x = input->info()->strides_in_bytes().x(); const int input_stride_y = input->info()->strides_in_bytes().y(); diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp index 88f8b31a35..53789e2472 100644 --- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp @@ -92,7 +92,7 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window) auto zero = static_cast<T>(0); if(std::is_same<T, uint8_t>::value) { - zero = _input->info()->quantization_info().offset; + zero = _input->info()->quantization_info().uniform().offset; } execute_window_loop(window_out, [&](const Coordinates & id) diff --git a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp index 1520225249..a6dc0977d2 100644 --- a/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDequantizationLayerKernel.cpp @@ -97,7 +97,7 @@ inline void store_result<float16_t>(float16_t *ptr, const float32x4x4_t &v) template <typename T> void run_dequantization(const ITensor *input, ITensor *output, const Window &window) { - const QuantizationInfo &qinfo = input->info()->quantization_info(); + const UniformQuantizationInfo &qinfo = input->info()->quantization_info().uniform(); const int window_step_x = 16; const auto window_start_x = static_cast<int>(window.x().start()); @@ -129,7 +129,7 @@ void run_dequantization(const ITensor *input, ITensor *output, const Window &win for(; x < window_end_x; ++x) { uint8_t val = *(in_ptr + x); - *(out_ptr + x) = static_cast<T>(qinfo.dequantize(val)); + *(out_ptr + x) = static_cast<T>(dequantize_qasymm8(val, qinfo)); } }, in, out); diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index 33457e1fca..0fe05d2044 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,9 +142,9 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar } template <ArithmeticOperation op> -inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_arithm_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { - return qinfo.quantize(elementwise_arithm_op_scalar<op>(a, b), RoundingPolicy::TO_NEAREST_UP); + return quantize_qasymm8(elementwise_arithm_op_scalar<op>(a, b), qinfo); } template <ArithmeticOperation op, typename VectorType> @@ -253,7 +253,7 @@ inline uint8_t elementwise_comp_op_scalar(const InputScalarType &a, const InputS } template <ComparisonOperation op> -inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, QuantizationInfo qinfo) +inline uint8_t elementwise_comp_op_quantized_scalar(const float &a, const float &b, UniformQuantizationInfo qinfo) { ARM_COMPUTE_UNUSED(qinfo); return elementwise_comp_op_scalar<op>(a, b); @@ -567,7 +567,7 @@ void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const } void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window, - uint8_t (*scalar_func)(const float &, const float &, QuantizationInfo), + uint8_t (*scalar_func)(const float &, const float &, UniformQuantizationInfo), int (*broadcast_func)(int, int, int, const uint8_t *, float32x4x4_t, uint8_t *, int32x4_t, float32x4_t, float32x4_t, float32x4_t, const bool), int (*neon_func)(int, int, int, const uint8_t *, const uint8_t *, uint8_t *, @@ -587,12 +587,11 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const auto window_end_x = static_cast<int>(window.x().end()); const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); - const float output_scale = out->info()->quantization_info().scale; - const int output_offset = out->info()->quantization_info().offset; + const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); // Output quantization info (add 0.5 to round toward the nearest integer - 0.5 rounds away from zero) - const float32x4_t voffseto = vdupq_n_f32(output_offset + 0.5f); - const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_scale); + const float32x4_t voffseto = vdupq_n_f32(output_qinfo.offset + 0.5f); + const float32x4_t invvscaleo = vdupq_n_f32(1.f / output_qinfo.scale); if(is_broadcast_across_x) { @@ -603,8 +602,8 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const ITensor *broadcast_tensor = is_broadcast_input_2 ? in2 : in1; const ITensor *non_broadcast_tensor = !is_broadcast_input_2 ? in2 : in1; - const QuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info(); - const QuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info(); + const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform(); + const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform(); const int32x4_t voffset_non_broadcast = vdupq_n_s32(non_broadcast_qinfo.offset); const float32x4_t vscale_non_broadcast = vdupq_n_f32(non_broadcast_qinfo.scale); @@ -628,31 +627,30 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o voffset_non_broadcast, vscale_non_broadcast, voffseto, invvscaleo, !is_broadcast_input_2); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo.scale, non_broadcast_qinfo.offset); - const float bfs = scvt_f32_qasymm8(broadcast_value, broadcast_qinfo.scale, broadcast_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, - out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(non_broadcast_input_ptr + x), non_broadcast_qinfo); + const float bfs = dequantize_qasymm8(broadcast_value, broadcast_qinfo); + *(output_ptr + x) = (*scalar_func)(!is_broadcast_input_2 ? bfs : afs, !is_broadcast_input_2 ? afs : bfs, output_qinfo); } }, broadcast_input, non_broadcast_input, output); } else { + const UniformQuantizationInfo input1_qinfo = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo input2_qinfo = in2->info()->quantization_info().uniform(); + // Input1 quantization info - const int32x4_t voffset1 = vdupq_n_s32(in1->info()->quantization_info().offset); - const float32x4_t vscale1 = vdupq_n_f32(in1->info()->quantization_info().scale); + const int32x4_t voffset1 = vdupq_n_s32(input1_qinfo.offset); + const float32x4_t vscale1 = vdupq_n_f32(input1_qinfo.scale); // Input2 quantization info - const int32x4_t voffset2 = vdupq_n_s32(in2->info()->quantization_info().offset); - const float32x4_t vscale2 = vdupq_n_f32(in2->info()->quantization_info().scale); + const int32x4_t voffset2 = vdupq_n_s32(input2_qinfo.offset); + const float32x4_t vscale2 = vdupq_n_f32(input2_qinfo.scale); // Clear X Dimension on execution window as we handle manually input1_win.set(Window::DimX, Window::Dimension(0, 1, 1)); input2_win.set(Window::DimX, Window::Dimension(0, 1, 1)); - const QuantizationInfo input1_qinfo = in1->info()->quantization_info(); - const QuantizationInfo input2_qinfo = in2->info()->quantization_info(); - Iterator input1(in1, input1_win); Iterator input2(in2, input2_win); Iterator output(out, win); @@ -667,9 +665,9 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o vscale1, vscale2, voffseto, invvscaleo); for(; x < window_end_x; ++x) { - const float afs = scvt_f32_qasymm8(*(input1_ptr + x), input1_qinfo.scale, input1_qinfo.offset); - const float bfs = scvt_f32_qasymm8(*(input2_ptr + x), input2_qinfo.scale, input2_qinfo.offset); - *(output_ptr + x) = (*scalar_func)(afs, bfs, out->info()->quantization_info()); + const float afs = dequantize_qasymm8(*(input1_ptr + x), input1_qinfo); + const float bfs = dequantize_qasymm8(*(input2_ptr + x), input2_qinfo); + *(output_ptr + x) = (*scalar_func)(afs, bfs, output_qinfo); } }, input1, input2, output); diff --git a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp index e699bac556..d45e3ce56a 100644 --- a/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp +++ b/src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp @@ -27,12 +27,9 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Utils.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" -#include "arm_compute/core/Window.h" #include "support/ToolchainSupport.h" diff --git a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp index cba3390641..0e77ead72b 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixVectorMultiplyKernel.cpp @@ -179,8 +179,8 @@ void NEGEMMMatrixVectorMultiplyKernel::matrix_vector_multiply<uint8_t, uint8_t, Iterator in2(_input1, window_w); Iterator out(_output, window_out); - const int input_offset = -_input0->info()->quantization_info().offset; - const int weights_offset = -_input1->info()->quantization_info().offset; + const int input_offset = -_input0->info()->quantization_info().uniform().offset; + const int weights_offset = -_input1->info()->quantization_info().uniform().offset; const int input_w = _input0->info()->dimension(0); const int input_h = _input0->info()->dimension(1); diff --git a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp index b8e204cfd8..8efab7da33 100644 --- a/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEHeightConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEHeightConcatenateLayerKernel::run(const Window &window, const ThreadInfo uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _height_offset * _output->info()->strides_in_bytes()[Window::DimY]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 34af0cf3fd..874259bbb7 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -279,7 +279,7 @@ void NEIm2ColKernel::run_im2col(const Window &window) const int pad_top = _conv_info.pad_top(); const int stride_x = _conv_info.stride().first; const int stride_y = _conv_info.stride().second; - const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().offset : 0; + const int pad_value = is_data_type_quantized(_input->info()->data_type()) ? _input->info()->quantization_info().uniform().offset : 0; Window window_in_out(window); // The first three dimensions of the input and output are increased by the inner loops diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index fa16484cd3..c313b23ad3 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -174,7 +174,7 @@ inline uint16x8_t scale255_U16_U16(uint16x8_t in) } void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, - const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info) + const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info) { const auto input1 = static_cast<const qasymm8_t *__restrict>(input1_ptr); const auto input2 = static_cast<const qasymm8_t *__restrict>(input2_ptr); @@ -187,7 +187,7 @@ void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, c const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info); const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info); - const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset); + const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset }; const float32x4x4_t out_f32x4x4 = { @@ -660,7 +660,7 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo execute_window_loop(collapsed, [&](const Coordinates &) { (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale, - _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info()); + _input1->info()->quantization_info().uniform(), _input2->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform()); collapsed.slide_window_slice_3D(slice_input1); collapsed.slide_window_slice_3D(slice_input2); }, diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index ac2ffa1988..62c9ca0d5e 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -562,6 +562,10 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con const int scale_step_x = (pool_stride_x == 1) ? 2 : 1; + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); + const bool have_different_qinfo = input_qinfo != output_qinfo; + execute_window_loop(window, [&](const Coordinates & id) { const auto top_data = vld1q_u8(reinterpret_cast<const uint8_t *>(input_top_ptr + input.offset())); @@ -640,9 +644,7 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con } } - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - if(input_qinfo != output_qinfo) + if(have_different_qinfo) { const auto requantized_output = vquantize(vdequantize(vcombine_u8(lower_res, upper_res), input_qinfo), output_qinfo); lower_res = vget_low_u8(requantized_output); @@ -814,8 +816,8 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top))); const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1)); @@ -1598,6 +1600,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); + execute_window_loop(window, [&](const Coordinates & id) { uint8_t res = 0; @@ -1671,11 +1676,7 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nchw(const Window &window_input, c } // Store result - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); - res = (input_qinfo != output_qinfo) ? sqcvt_qasymm8_f32(scvt_f32_qasymm8(res, input_qinfo.scale, input_qinfo.offset), output_qinfo.scale, - output_qinfo.offset) : - res; + res = (input_qinfo != output_qinfo) ? quantize_qasymm8(dequantize_qasymm8(res, input_qinfo), output_qinfo) : res; *(reinterpret_cast<uint8_t *>(output.ptr())) = res; }, input, output); @@ -1698,9 +1699,9 @@ void NEPoolingLayerKernel::poolingMxN_qasymm8_nhwc(const Window &window_input, c const int upper_bound_w = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = _input->info()->dimension(2) + (exclude_padding ? 0 : pool_pad_bottom); - const float32x4_t half_scale_v = vdupq_n_f32(0.5f); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + const float32x4_t half_scale_v = vdupq_n_f32(0.5f); + const UniformQuantizationInfo input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo output_qinfo = _output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { diff --git a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp index 4deeb1c7cc..0aa34cd411 100644 --- a/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp +++ b/src/core/NEON/kernels/NEQuantizationLayerKernel.cpp @@ -107,6 +107,7 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio const auto window_start_x = static_cast<int>(window.x().start()); const auto window_end_x = static_cast<int>(window.x().end()); + const UniformQuantizationInfo uqinfo = qinfo.uniform(); #ifdef __aarch64__ constexpr RoundingPolicy rounding_policy = RoundingPolicy::TO_NEAREST_EVEN; #else //__aarch64__ @@ -127,12 +128,12 @@ void NEQuantizationLayerKernel::quantize(const Window &window, const Quantizatio int x = window_start_x; for(; x <= (window_end_x - window_step); x += window_step) { - wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), qinfo)); + wrapper::vstore(&output_ptr[x], vquantize(load_value(&input_ptr[x]), uqinfo)); } // Compute left-over elements for(; x < window_end_x; ++x) { - output_ptr[x] = qinfo.quantize(input_ptr[x], rounding_policy); + output_ptr[x] = quantize_qasymm8(input_ptr[x], uqinfo, rounding_policy); } }, input, output); diff --git a/src/core/NEON/kernels/NEReductionOperationKernel.cpp b/src/core/NEON/kernels/NEReductionOperationKernel.cpp index c6e853659c..1bfef27d49 100644 --- a/src/core/NEON/kernels/NEReductionOperationKernel.cpp +++ b/src/core/NEON/kernels/NEReductionOperationKernel.cpp @@ -542,6 +542,9 @@ struct RedOpX_qasymm8 inline void operator()(Iterator &input, Iterator &output, Window &in_slice, Window &out_slice, const TensorInfo &in_info, const ReductionOperation op) { ARM_COMPUTE_UNUSED(out_slice); + + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + auto vec_res_value1 = vdupq_n_u32(static_cast<uint32_t>(0.f)); auto vec_res_value2 = vdupq_n_u32(static_cast<uint32_t>(0.f)); auto vec_res_value3 = vdupq_n_u32(static_cast<uint32_t>(0.f)); @@ -584,8 +587,8 @@ struct RedOpX_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -673,7 +676,7 @@ struct RedOpX_qasymm8 res *= wrapper::vgetlane(carry_res, 3); //re-quantize result - res = sqcvt_qasymm8_f32(res, in_info.quantization_info().scale, in_info.quantization_info().offset); + res = quantize_qasymm8(res, iq_info); *(output.ptr()) = static_cast<uint8_t>(res); break; } @@ -877,6 +880,8 @@ struct RedOpYZW_qasymm8 { ARM_COMPUTE_UNUSED(out_slice); + const UniformQuantizationInfo iq_info = in_info.quantization_info().uniform(); + execute_window_loop(in_slice, [&](const Coordinates &) { uint32x4x4_t vec_res_idx{ { 0 } }; @@ -932,8 +937,8 @@ struct RedOpYZW_qasymm8 } case ReductionOperation::PROD: { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto scale32x4f_4 = vdupq_n_f32(in_info.quantization_info().scale); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto scale32x4f_4 = vdupq_n_f32(iq_info.scale); const auto temp16x8t_1 = vmovl_u8(vget_low_u8(vec_elements)); const auto temp16x8t_2 = vmovl_u8(vget_high_u8(vec_elements)); @@ -1004,8 +1009,8 @@ struct RedOpYZW_qasymm8 } else if(op == ReductionOperation::PROD) { - const auto offset32x4f_4 = vdupq_n_f32(in_info.quantization_info().offset); - const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(in_info.quantization_info().scale)); + const auto offset32x4f_4 = vdupq_n_f32(iq_info.offset); + const auto iscale32x4f_4 = vinvq_f32(vdupq_n_f32(iq_info.scale)); //re-quantize vec_res_value1_f = vaddq_f32(vmulq_f32(vec_res_value1_f, iscale32x4f_4), offset32x4f_4); diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 36398cf89a..99328deecd 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -31,7 +31,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/wrapper/wrapper.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" diff --git a/src/core/NEON/kernels/NEScaleKernel.cpp b/src/core/NEON/kernels/NEScaleKernel.cpp index 003f472486..e99b97bbe5 100644 --- a/src/core/NEON/kernels/NEScaleKernel.cpp +++ b/src/core/NEON/kernels/NEScaleKernel.cpp @@ -218,7 +218,7 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset const int input_height = input->info()->dimension(2); T border_value; - if(use_padding && border_mode != BorderMode::REPLICATE ) + if(use_padding && border_mode != BorderMode::REPLICATE) { // configure() sets top border to 0 for BorderMode::REPLICATE and border_value is not needed in execute_window_loop() for REPLICATE border_value = *reinterpret_cast<T *>(input->buffer() + input->info()->offset_first_element_in_bytes() - stride_w); @@ -235,9 +235,9 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset int border_size = (border_mode == BorderMode::UNDEFINED) ? 0 : 1; - const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = input->info()->quantization_info(); - const QuantizationInfo oq_info = output->info()->quantization_info(); + const bool is_quantized = (input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); execute_window_loop(window, [&](const Coordinates & id) { @@ -295,11 +295,11 @@ inline void scale_bilinear_nhwc_core(const ITensor *input, const ITensor *offset //dequantize quantized input if(is_quantized) { - float inp00 = iq_info.dequantize(a00); - float inp01 = iq_info.dequantize(a01); - float inp10 = iq_info.dequantize(a10); - float inp11 = iq_info.dequantize(a11); - res = static_cast<T>(oq_info.quantize((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), RoundingPolicy::TO_NEAREST_UP)); + float inp00 = dequantize_qasymm8(a00, iq_info); + float inp01 = dequantize_qasymm8(a01, iq_info); + float inp10 = dequantize_qasymm8(a10, iq_info); + float inp11 = dequantize_qasymm8(a11, iq_info); + res = static_cast<T>(quantize_qasymm8((inp00 * w1 + inp01 * w2 + inp10 * w3 + inp11 * w4), oq_info)); } else { @@ -651,9 +651,9 @@ void NEScaleKernel::scale_bilinear_nchw(const Window &window) const size_t in_stide_in_bytes = _input->info()->strides_in_bytes()[1]; const size_t in_stride = in_stide_in_bytes / _input->info()->element_size(); - const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); - const QuantizationInfo iq_info = _input->info()->quantization_info(); - const QuantizationInfo oq_info = _output->info()->quantization_info(); + const bool is_quantized = (_input->info()->data_type() == DataType::QASYMM8); + const UniformQuantizationInfo iq_info = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = _output->info()->quantization_info().uniform(); switch(_input->info()->data_type()) { diff --git a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp index e9417ece44..4144a1877b 100644 --- a/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp +++ b/src/core/NEON/kernels/NESoftmaxLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -595,7 +595,7 @@ void logits_1d_softmax_qasymm8(const ITensor &in, const ITensor &max, void *cons const int start_x = in.info()->valid_region().anchor.x(); const int input_width = in.info()->valid_region().shape.x(); - const float scale_beta = -beta * in.info()->quantization_info().scale; + const float scale_beta = -beta * in.info()->quantization_info().uniform().scale; Iterator in_it(&in, window); Iterator max_it(&max, window); diff --git a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp index aea6875f20..28f655c529 100644 --- a/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWidthConcatenateLayerKernel.cpp @@ -112,11 +112,11 @@ void NEWidthConcatenateLayerKernel::run(const Window &window, const ThreadInfo & uint8_t *output_ptr = _output->buffer() + _output->info()->offset_first_element_in_bytes() + _width_offset * _output->info()->strides_in_bytes()[0]; // Create iterators - Iterator input(_input, window); - Iterator output(_output, window); - const DataType dt = _input->info()->data_type(); - const QuantizationInfo &input_qinfo = _input->info()->quantization_info(); - const QuantizationInfo &output_qinfo = _output->info()->quantization_info(); + Iterator input(_input, window); + Iterator output(_output, window); + const DataType dt = _input->info()->data_type(); + const UniformQuantizationInfo &input_qinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo &output_qinfo = _output->info()->quantization_info().uniform(); if(dt == DataType::QASYMM8 && input_qinfo != output_qinfo) { execute_window_loop(window, [&](const Coordinates &) diff --git a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp index 09a4a11b66..383c2b8b99 100644 --- a/src/core/NEON/kernels/NEYOLOLayerKernel.cpp +++ b/src/core/NEON/kernels/NEYOLOLayerKernel.cpp @@ -30,7 +30,6 @@ #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/NEON/kernels/detail/NEActivationFunctionDetail.h" -#include "arm_compute/core/QAsymm8.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" |