From 951b8a4c01de2810349b6f16cf9bbba7578484fa Mon Sep 17 00:00:00 2001 From: Vidhya Sudhan Loganathan Date: Mon, 4 Nov 2019 14:42:08 +0000 Subject: COMPMID-2309 : CLConvolutionLayer: support QUANT8_SYMM_PER_CHANNEL filters Change-Id: I16f6758b768ede404a064db057302ded706e1e8a Signed-off-by: Vidhya Sudhan Loganathan Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/2215 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- tests/validation/fixtures/GEMMLowpFixture.h | 122 ++++++++++++++++++++++------ 1 file changed, 96 insertions(+), 26 deletions(-) (limited to 'tests/validation/fixtures/GEMMLowpFixture.h') diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 8385221c78..5d092ecac2 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -47,23 +48,66 @@ namespace template void fill(U &&tensor, int i) { - // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path - std::uniform_int_distribution<> distribution(1, 254); - library->fill(tensor, distribution, i); + switch(tensor.data_type()) + { + case DataType::QSYMM8_PER_CHANNEL: + { + int min_bound = 128; + int max_bound = -127; + for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++) + { + std::pair bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i); + if(bounds.first < min_bound) + { + min_bound = bounds.first; + } + if(bounds.second > max_bound) + { + max_bound = bounds.second; + } + } + std::uniform_int_distribution distribution(min_bound, max_bound); + library->fill(tensor, distribution, i); + break; + } + case DataType::QASYMM8: + { + std::uniform_int_distribution distribution(1, 254); + library->fill(tensor, distribution, i); + break; + } + case DataType::F16: + case DataType::F32: + { + // Between 1 and 254 in order to avoid having -128 and 128 for the DOT product path + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(tensor, distribution, i); + break; + } + default: + library->fill_tensor_uniform(tensor, i); + } } template TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo()) + GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo()) { // Create tensors TensorType a = create_tensor(shape_a, DataType::QASYMM8, 1); - TensorType b = create_tensor(shape_b, DataType::QASYMM8, 1); + TensorType b = create_tensor(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated TensorType output = create_tensor(shape_output, output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : DataType::QASYMM8, 1); a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset)); - b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); + if(data_type_b == DataType::QSYMM8_PER_CHANNEL) + { + b.info()->set_quantization_info(b_qinfo); + } + else + { + b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset)); + } TensorType bias; if(is_fused) { @@ -101,14 +145,14 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape ARM_COMPUTE_EXPECT(!bias.info()->is_resizable(), framework::LogLevel::ERRORS); fill(AccessorType(bias), 2); } - // Compute GEMM function gemmlowp.run(); return output; } -template -SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) +template +SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, + DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo()) { TensorShape shape_a_to_use = shape_a; if(reinterpret_input_as_3d) @@ -119,13 +163,12 @@ SimpleTensor compute_gemmlowp_reference(const TensorShape &shape_a, con // Create reference SimpleTensor a{ shape_a_to_use, DataType::QASYMM8, 1 }; - SimpleTensor b{ shape_b, DataType::QASYMM8, 1 }; + SimpleTensor b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) }; // Fill reference fill(a, 0); fill(b, 1); - - return reference::gemmlowp_matrix_multiply_core(a, b, shape_output, a_offset, b_offset); + return reference::gemmlowp_matrix_multiply_core(a, b, shape_output, a_offset, b_offset); } } @@ -155,29 +198,50 @@ protected: SimpleTensor _reference{}; }; -template +template class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture { public: template - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b) { ARM_COMPUTE_EXPECT(output_stage.type != GEMMLowpOutputStageType::NONE, framework::LogLevel::ERRORS); - _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage); - _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage); + if(data_type_b == DataType::QSYMM8_PER_CHANNEL) + { + output_stage.is_quantized_per_channel = true; + const size_t num_channels = shape_b[0]; + std::vector scales(num_channels); + std::uniform_real_distribution<> distribution(0, 1); + library->fill(scales, distribution, 0); + output_stage.gemmlowp_multipliers.resize(num_channels); + output_stage.gemmlowp_shifts.resize(num_channels); + for(size_t i = 0; i < num_channels; ++i) + { + quantization::calculate_quantized_multiplier_less_than_one(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]); + } + + _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_b, QuantizationInfo(scales)); + _target = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_b, QuantizationInfo(scales)); + } + else + { + _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_b, QuantizationInfo()); + _target = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_b, QuantizationInfo()); + } } protected: - TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage) + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, + DataType data_type_b, QuantizationInfo b_qinfo) { return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, b_offset, - output_stage); + output_stage, data_type_b, b_qinfo); } SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, - GEMMLowpOutputStageInfo output_stage) + GEMMLowpOutputStageInfo output_stage, DataType data_type_b, QuantizationInfo b_qinfo) { - SimpleTensor output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_offset, b_offset); + SimpleTensor output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_b, b_qinfo); TensorShape bias_shape(shape_b[0]); SimpleTensor bias{ bias_shape, DataType::S32, 1 }; @@ -187,11 +251,11 @@ protected: { case GEMMLowpOutputStageType::QUANTIZE_DOWN: return reference::gemmlowp_quantize_down_int32_to_uint8_scale(output, bias, - output_stage.gemmlowp_offset, output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); + output_stage.gemmlowp_offset, output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; case GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT: return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(output, bias, - output_stage.gemmlowp_multiplier, output_stage.gemmlowp_shift, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); + output_stage.gemmlowp_multipliers, output_stage.gemmlowp_shifts, output_stage.gemmlowp_offset, output_stage.gemmlowp_min_bound, output_stage.gemmlowp_max_bound); break; default: ARM_COMPUTE_ERROR("Not Supported!"); @@ -276,16 +340,19 @@ protected: // Fill reference fill(a, 0); + const std::vector result_mult_int_vec = { result_mult_int }; + const std::vector result_shift_vec = { result_shift }; + if(add_bias) { // Fill bias fill(b, 1); - return reference::gemmlowp_quantize_down_int32_to_uint8_scale(a, b, result_offset, result_mult_int, result_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale(a, b, result_offset, result_mult_int_vec, result_shift_vec, min, max); } else { - return reference::gemmlowp_quantize_down_int32_to_uint8_scale(a, result_offset, result_mult_int, result_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale(a, result_offset, result_mult_int_vec, result_shift_vec, min, max); } } @@ -368,16 +435,19 @@ protected: // Fill reference fill(a, 0); + const std::vector result_fixed_point_multiplier_vec = { result_fixed_point_multiplier }; + const std::vector result_shift_vec = { result_shift }; + if(add_bias) { // Fill bias fill(b, 1); - return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(a, b, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(a, b, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max); } else { - return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(a, result_fixed_point_multiplier, result_shift, result_offset_after_shift, min, max); + return reference::gemmlowp_quantize_down_int32_to_uint8_scale_by_fixedpoint(a, result_fixed_point_multiplier_vec, result_shift_vec, result_offset_after_shift, min, max); } } -- cgit v1.2.1