From 14cbfb2921990d8bf125231e350e2ac8dcd95a8b Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 23 Oct 2019 10:53:10 +0100 Subject: COMPMID-2609: Enable quantization with multiplier greater than 1 on OpenCL Change-Id: I050f1f84e214e61f7cbb0197a672b68a4940edae Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2158 Comments-Addressed: Arm Jenkins Reviewed-by: Manuel Bottini Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena --- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 74 +++++++++++++--------- 1 file changed, 45 insertions(+), 29 deletions(-) (limited to 'src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp') diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 7b74a5a98c..e61e5c3901 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -38,8 +38,8 @@ #include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "support/ToolchainSupport.h" -using namespace arm_compute; - +namespace arm_compute +{ namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info) @@ -69,7 +69,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, { const auto supported_data_layout = is_data_type_quantized(data_type) ? DataLayout::NCHW : DataLayout::NHWC; const auto error_message = std::string("Only " + string_from_data_layout(supported_data_layout) + " layout is supported for 9x9 convolution with " + string_from_data_type( - data_type) + " type"); + data_type) + + " type"); ARM_COMPUTE_RETURN_ERROR_ON_MSG((supported_data_layout != data_layout), error_message.c_str()); } @@ -98,6 +99,17 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); } + if(is_data_type_quantized(data_type)) + { + const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = output->quantization_info().uniform(); + + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; + int output_multiplier = 0; + int output_shift = 0; + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + } return Status{}; } @@ -483,8 +495,6 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL } else { - const bool is_quantized_asymm = is_data_type_quantized_asymmetric(data_type); - build_options.add_option_if(is_quantized_asymm, std::string("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size))); build_options.add_option(std::string("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type))); build_options.add_option(std::string("-DDATA_SIZE=" + get_data_size_from_data_type(data_type))); build_options.add_option(std::string("-DWEIGHTS_DEPTH=" + support::cpp11::to_string(_weights->info()->dimension(channel_idx)))); @@ -508,9 +518,35 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL } } build_options.add_option(std::string("-DDATA_TYPE_PROMOTED=" + get_cl_type_from_data_type(data_type))); - // Create kernel - _kernel = static_cast(CLKernelLibrary::get().create_kernel(is_quantized_asymm ? "direct_convolution_quantized" : kernel_name.str(), - build_options.options())); + + if(is_data_type_quantized(data_type)) + { + const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform(); + + float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; + int output_multiplier = 0; + int output_shift = 0; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + build_options.add_option("-DOUTPUT_MULTIPLIER=" + support::cpp11::to_string(output_multiplier)); + build_options.add_option("-DOUTPUT_SHIFT=" + support::cpp11::to_string(output_shift)); + build_options.add_option("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size)); + + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel("direct_convolution_quantized", build_options.options())); + + // Set static kernel arguments + unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1; + _kernel.setArg(idx++, -iqinfo.offset); + _kernel.setArg(idx++, -wqinfo.offset); + _kernel.setArg(idx++, oqinfo.offset); + } + else + { + // Create kernel + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name.str(), build_options.options())); + } } // Configure kernel window @@ -518,27 +554,6 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL ARM_COMPUTE_ERROR_THROW_ON(win_config.first); ICLKernel::configure_internal(win_config.second); - // Set static kernel arguments - if(is_data_type_quantized_asymmetric(data_type)) - { - const UniformQuantizationInfo iqinfo = _input->info()->quantization_info().uniform(); - const UniformQuantizationInfo wqinfo = _weights->info()->quantization_info().uniform(); - const UniformQuantizationInfo oqinfo = _output->info()->quantization_info().uniform(); - - int output_multiplier = 0; - int output_shift = 0; - - float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; - ARM_COMPUTE_THROW_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); - - unsigned int idx = 3 * num_arguments_per_3D_tensor() + ((_biases != nullptr) ? num_arguments_per_1D_tensor() : 0) + 1; - _kernel.setArg(idx++, -iqinfo.offset); - _kernel.setArg(idx++, -wqinfo.offset); - _kernel.setArg(idx++, oqinfo.offset); - _kernel.setArg(idx++, output_multiplier); - _kernel.setArg(idx++, output_shift); - } - // Set config_id for enabling LWS tuning _config_id = "direct_convolution_"; _config_id += lower_string(string_from_data_type(data_type)); @@ -614,3 +629,4 @@ void CLDirectConvolutionLayerKernel::run(const Window &window, cl::CommandQueue } while(window.slide_window_slice_3D(slice) && win_in.slide_window_slice_3D(slice_in)); } +} // namespace arm_compute -- cgit v1.2.1