diff options
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp | 88 |
1 files changed, 40 insertions, 48 deletions
diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index f4377cdaf2..caff117e09 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -59,7 +59,7 @@ void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const I Status NEConvolutionLayerReshapeWeights::validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(weights); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(weights->num_dimensions() > 4); if(biases != nullptr) @@ -114,18 +114,18 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const UniformQuantizationInfo iqinfo = input->info()->quantization_info().uniform(); - const UniformQuantizationInfo wqinfo = weights->info()->quantization_info().uniform(); - - input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset)); - weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset)); - - const UniformQuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info().uniform(); - - float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; - int output_multiplier; - int output_shift; - quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); + const QuantizationInfo iqinfo = input->info()->quantization_info(); + const QuantizationInfo wqinfo = weights->info()->quantization_info(); + const QuantizationInfo oqinfo = (output->info()->total_size() == 0) ? iqinfo : output->info()->quantization_info(); + const UniformQuantizationInfo uiqinfo = iqinfo.uniform(); + const UniformQuantizationInfo uoqinfo = oqinfo.uniform(); + + input->info()->set_quantization_info(QuantizationInfo(uiqinfo.scale, -uiqinfo.offset)); + if(!is_data_type_quantized_per_channel(weights->info()->data_type())) + { + const UniformQuantizationInfo uwqinfo = wqinfo.uniform(); + weights->info()->set_quantization_info(QuantizationInfo(uwqinfo.scale, -uwqinfo.offset)); + } // Merge activation with output stage int min_activation = 0; @@ -133,26 +133,25 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w if(supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo); - const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo); + const int a_const_int = quantize_qasymm8(act_info.a(), uoqinfo); + const int b_const_int = quantize_qasymm8(act_info.b(), uoqinfo); - min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int; + min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? uoqinfo.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; } GEMMLowpOutputStageInfo output_info; - output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - output_info.gemmlowp_offset = oqinfo.offset; - output_info.gemmlowp_multiplier = output_multiplier; - output_info.gemmlowp_shift = output_shift; - output_info.gemmlowp_min_bound = min_activation; - output_info.gemmlowp_max_bound = max_activation; + output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + output_info.gemmlowp_offset = uoqinfo.offset; + output_info.gemmlowp_min_bound = min_activation; + output_info.gemmlowp_max_bound = max_activation; + quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info); _mm_gemmlowp.configure(input, weights, biases, output, GEMMInfo(false, false, true, gemm_3d_depth, _skip_im2col, false, output_info)); // Revert back QuantizatioInfo as input and weights could be used in other convolution layers - input->info()->set_quantization_info(QuantizationInfo(iqinfo.scale, iqinfo.offset)); - weights->info()->set_quantization_info(QuantizationInfo(wqinfo.scale, wqinfo.offset)); + input->info()->set_quantization_info(iqinfo); + weights->info()->set_quantization_info(wqinfo); } else { @@ -176,20 +175,10 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens { // Since we need negative offsets for computing convolution, we need to change QuantizationInfo() // Extract and negate input and weights offset - const UniformQuantizationInfo iqinfo = input->quantization_info().uniform(); - const UniformQuantizationInfo wqinfo = weights->quantization_info().uniform(); - - std::unique_ptr<ITensorInfo> input_qa = input->clone(); - std::unique_ptr<ITensorInfo> weights_qa = weights->clone(); - input_qa->set_quantization_info(QuantizationInfo(iqinfo.scale, -iqinfo.offset)); - weights_qa->set_quantization_info(QuantizationInfo(wqinfo.scale, -wqinfo.offset)); - - const UniformQuantizationInfo oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info().uniform(); - - float multiplier = iqinfo.scale * wqinfo.scale / oqinfo.scale; - int output_multiplier; - int output_shift; - ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift)); + const QuantizationInfo &iqinfo = input->quantization_info(); + const QuantizationInfo &wqinfo = weights->quantization_info(); + const QuantizationInfo &oqinfo = (output->total_size() == 0) ? iqinfo : output->quantization_info(); + const UniformQuantizationInfo uoqinfo = oqinfo.uniform(); // Merge activation with output stage int min_activation = 0; @@ -201,22 +190,25 @@ Status NEGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens }; if(is_activation_enabled && supported_acts.count(act_info.activation()) != 0) { - const int a_const_int = quantize_qasymm8(act_info.a(), oqinfo); - const int b_const_int = quantize_qasymm8(act_info.b(), oqinfo); + const int a_const_int = quantize_qasymm8(act_info.a(), uoqinfo); + const int b_const_int = quantize_qasymm8(act_info.b(), uoqinfo); - min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? oqinfo.offset : b_const_int; + min_activation = act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU ? uoqinfo.offset : b_const_int; max_activation = act_info.activation() == ActivationLayerInfo::ActivationFunction::RELU ? 255 : a_const_int; } GEMMLowpOutputStageInfo output_info; - output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; - output_info.gemmlowp_offset = oqinfo.offset; - output_info.gemmlowp_multiplier = output_multiplier; - output_info.gemmlowp_shift = output_shift; - output_info.gemmlowp_min_bound = min_activation; - output_info.gemmlowp_max_bound = max_activation; + output_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + output_info.gemmlowp_offset = uoqinfo.offset; + output_info.gemmlowp_min_bound = min_activation; + output_info.gemmlowp_max_bound = max_activation; + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multipliers_less_than_one(iqinfo, wqinfo, oqinfo, output_info)); // Perform validation step on GEMMLowp + std::unique_ptr<ITensorInfo> input_qa = input->clone(); + std::unique_ptr<ITensorInfo> weights_qa = weights->clone(); + input_qa->set_quantization_info(QuantizationInfo(iqinfo.uniform().scale, -iqinfo.uniform().offset)); + weights_qa->set_quantization_info(QuantizationInfo(wqinfo.uniform().scale, -wqinfo.uniform().offset)); return NEGEMMLowpMatrixMultiplyCore::validate(input_qa.get(), weights_qa.get(), biases, output, GEMMInfo(false, false, true, gemm_3d_depth, skip_im2col, false, output_info)); } else @@ -396,7 +388,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QASYMM8, DataType::QSYMM8_PER_CHANNEL, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); ARM_COMPUTE_RETURN_ERROR_ON_MSG(num_groups > 1, "Grouping (num_groups != 1) is not supported on NEON"); |