diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-05-21 13:32:43 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-06-03 14:51:29 +0000 |
commit | 4c5469b192665c94118a8a558787cb9cec2d0765 (patch) | |
tree | 168aa969de8243bdbb1f25247dd9f54d037ae32c /src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | |
parent | 43a129e94df41f9ac8bc78b702da5a387ada0494 (diff) | |
download | ComputeLibrary-4c5469b192665c94118a8a558787cb9cec2d0765.tar.gz |
COMPMID-2225: Add interface support for new quantized data types.
Add support for:
-QSYMM8, 8-bit quantized symmetric
-QSYMM8_PER_CHANNEL, 8-bit quantized symmetric with per channel quantization
Change-Id: I00c4ff98e44af37419470af61419ee95d0de2463
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1236
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp | 32 |
1 files changed, 20 insertions, 12 deletions
diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index 03d516f703..4e518fcfd5 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -115,8 +115,8 @@ void CLGEMMConvolutionLayer::configure_mm(const ICLTensor *input, const ICLTenso const QuantizationInfo input_quantization_info = input->info()->quantization_info(); const QuantizationInfo weights_quantization_info = weights->info()->quantization_info(); - input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input->info()->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights->info()->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); _mm_gemmlowp.configure(input, weights, biases, output, gemm_info); @@ -151,8 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens std::unique_ptr<ITensorInfo> input_qa = input->clone(); std::unique_ptr<ITensorInfo> weights_qa = weights->clone(); - input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.scale, -input_quantization_info.offset)); - weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.scale, -weights_quantization_info.offset)); + input_qa->set_quantization_info(QuantizationInfo(input_quantization_info.uniform().scale, -input_quantization_info.uniform().offset)); + weights_qa->set_quantization_info(QuantizationInfo(weights_quantization_info.uniform().scale, -weights_quantization_info.uniform().offset)); // Perform validation step on GEMMLowp return CLGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, gemm_info); @@ -190,6 +190,10 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * const unsigned int kernel_width = weights->info()->dimension(idx_width); const unsigned int kernel_height = weights->info()->dimension(idx_height); + const UniformQuantizationInfo iq_info = input->info()->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->info()->quantization_info().uniform(); + _is_prepared = weights_info.retain_internal_weights(); _original_weights = weights; _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -281,9 +285,9 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * // Configure output stage for quantized case if(_is_quantized) { - const QuantizationInfo output_quant_info = (output->info()->total_size() == 0) ? input->info()->quantization_info() : output->info()->quantization_info(); + const auto output_quant_info = (output->info()->total_size() == 0) ? iq_info : oq_info; - const float multiplier = (input->info()->quantization_info().scale * weights->info()->quantization_info().scale) / output_quant_info.scale; + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); @@ -298,8 +302,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * if(_is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info); + const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info); min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; @@ -387,6 +391,10 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI // In case of F16, fused bias will be used in GEMM const bool run_addition = (skip_im2col) && (append_bias) && (data_type != DataType::F16); + const UniformQuantizationInfo iq_info = input->quantization_info().uniform(); + const UniformQuantizationInfo wq_info = weights->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = output->quantization_info().uniform(); + ARM_COMPUTE_RETURN_ERROR_ON((weights->dimension(idx_channel) * num_groups) != input->dimension(idx_channel)); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); @@ -468,9 +476,9 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI if(is_quantized) { - const QuantizationInfo output_quant_info = (output->total_size() == 0) ? input->quantization_info() : output->quantization_info(); + const auto output_quant_info = (output->total_size() == 0) ? iq_info : oq_info; - const float multiplier = (input->quantization_info().scale * weights->quantization_info().scale) / output_quant_info.scale; + const float multiplier = (iq_info.scale * wq_info.scale) / output_quant_info.scale; int output_multiplier = 0; int output_shift = 0; @@ -486,8 +494,8 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI if(is_activationlayer_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = output_quant_info.quantize(act_info.a(), RoundingPolicy::TO_NEAREST_UP); - const int b_const_int = output_quant_info.quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); + const int a_const_int = quantize_qasymm8(act_info.a(), output_quant_info); + const int b_const_int = quantize_qasymm8(act_info.b(), output_quant_info); min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? output_quant_info.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; |