From 02c452fe1ec17c3941272a07b5cae1f32d614c56 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Thu, 26 Oct 2023 00:14:36 +0100 Subject: Add Dynamic Quantization tests to Fully Connected Layer This patch calculates the output quantization info based on the inputs' quantization information. The previous approach was using the same quantization information for input, weights and output. This implementation does not cover the cases where we have fused activation function. Resolves: [COMPMID-6484] Signed-off-by: Mohammed Suhail Munshi Change-Id: Ib58143165191e82ae8547e661ac7c8d077bda200 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10539 Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- .../fixtures/FullyConnectedLayerFixture.h | 205 ++++++++++++++++----- 1 file changed, 164 insertions(+), 41 deletions(-) (limited to 'tests/validation/fixtures/FullyConnectedLayerFixture.h') diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h index 7cfe6e49b9..05f20ac12b 100644 --- a/tests/validation/fixtures/FullyConnectedLayerFixture.h +++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h @@ -55,6 +55,40 @@ public: using TBias = typename std::conditional < (std::is_same::value || std::is_same::value), int32_t, T >::type; public: + void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type) + { + _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1]; + const int32_t t_max = static_cast(std::numeric_limits::max()); + const int32_t t_min = static_cast(std::numeric_limits::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + input_q_info = QuantizationInfo(scale_lhs, offset_lhs); + weights_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + + const int k = weights_shape.x(); + QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/); + + _dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + + // Do not change here as these limits are the natural limits of the associated data types and + // are embedded in the computation of the dst quantization info. + _min_u8 = 0; + _max_u8 = 255; + _min_s8 = -128; + _max_s8 = 127; + } + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights, DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info, bool mixed_layout = false) { @@ -64,7 +98,20 @@ public: _mixed_layout = mixed_layout; _data_type = data_type; _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; - _quantization_info = quantization_info; + + // Note : Quantization Info parameter from setup function is only used when quant datatype and activation function is not enabled or is identity. + if(is_data_type_quantized(data_type) && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY)) + { + // Initialises quantization info with appropriate scale and offset for given input shapes. + setup_quantization(weights_shape, output_shape,_input_q_info, _weight_q_info, data_type); + } + else + { + _input_q_info = quantization_info; + _weight_q_info = quantization_info; + _dst_q_info = quantization_info; + } + _activation_info = activation_info; _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights); @@ -92,17 +139,17 @@ protected: { if(_data_type == DataType::QASYMM8) { - std::uniform_int_distribution distribution(0, 30); + std::uniform_int_distribution distribution(_min_u8, _max_u8); library->fill(tensor, distribution, i); } else if(_data_type == DataType::QASYMM8_SIGNED) { - std::uniform_int_distribution distribution(-15, 15); + std::uniform_int_distribution distribution(_min_s8, _max_s8); library->fill(tensor, distribution, i); } else if(_data_type == DataType::S32) { - std::uniform_int_distribution distribution(-50, 50); + std::uniform_int_distribution distribution(_min_bias, _max_bias); library->fill(tensor, distribution, i); } else if(_data_type == DataType::F16) @@ -144,10 +191,10 @@ protected: } // Create tensors - TensorType src = create_tensor(input_shape, _data_type, 1, _quantization_info); - TensorType weights = create_tensor(reshaped_weights_shape, _data_type, 1, _quantization_info); - TensorType bias = create_tensor(bias_shape, _bias_data_type, 1, _quantization_info); - TensorType dst = create_tensor(output_shape, _data_type, 1, _quantization_info); + TensorType src = create_tensor(input_shape, _data_type, 1, _input_q_info); + TensorType weights = create_tensor(reshaped_weights_shape, _data_type, 1, _weight_q_info); + TensorType bias = create_tensor(bias_shape, _bias_data_type, 1); + TensorType dst = create_tensor(output_shape, _data_type, 1, _dst_q_info); // Create Fully Connected layer info FullyConnectedLayerInfo fc_info; @@ -178,8 +225,8 @@ protected: ARM_COMPUTE_ASSERT(!dst.info()->is_resizable()); // Fill tensors - fill(AccessorType(src), 0); - fill(AccessorType(bias), 2); + fill(AccessorType(src), 0 + _hash); + fill(AccessorType(bias), 2 + _hash); if(!reshape_weights || !transpose_weights) { @@ -187,7 +234,7 @@ protected: RawTensor tmp(tmp_shape, _data_type, 1); // Fill with original shape - fill(tmp, 1); + fill(tmp, 1 + _hash); // Transpose elementwise tmp = transpose(tmp); @@ -204,7 +251,7 @@ protected: } else { - fill(AccessorType(weights), 1); + fill(AccessorType(weights), 1 + _hash); } if(_mixed_layout) @@ -223,16 +270,16 @@ protected: SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape) { // Create reference - SimpleTensor src{ input_shape, _data_type, 1, _quantization_info }; - SimpleTensor weights{ weights_shape, _data_type, 1, _quantization_info }; - SimpleTensor bias{ bias_shape, _bias_data_type, 1, _quantization_info }; + SimpleTensor src{ input_shape, _data_type, 1, _input_q_info }; + SimpleTensor weights{ weights_shape, _data_type, 1, _weight_q_info }; + SimpleTensor bias{ bias_shape, _bias_data_type, 1, QuantizationInfo() }; // Fill reference - fill(src, 0); - fill(weights, 1); - fill(bias, 2); + fill(src, 0 + _hash); + fill(weights, 1 + _hash); + fill(bias, 2 + _hash); - return reference::activation_layer(reference::fully_connected_layer(src, weights, bias, output_shape, _quantization_info), _activation_info, _quantization_info); + return reference::activation_layer(reference::fully_connected_layer(src, weights, bias, output_shape, _dst_q_info), _activation_info, _dst_q_info); } TensorType _target{}; @@ -240,8 +287,22 @@ protected: DataType _data_type{}; DataType _bias_data_type{}; bool _mixed_layout{ false }; - QuantizationInfo _quantization_info{}; + QuantizationInfo _input_q_info{}; + QuantizationInfo _weight_q_info{}; + QuantizationInfo _dst_q_info{}; ActivationLayerInfo _activation_info{}; + + // Random initialization limits + // Default values are previously handcrafted limits + // that sould be used when we don't use dynamic quantization + int32_t _min_bias{-50}; + int32_t _max_bias{50}; + + int32_t _min_u8{0}; + int32_t _max_u8{30}; + int32_t _min_s8{-15}; + int32_t _max_s8{15}; + int _hash{0}; }; template @@ -289,12 +350,17 @@ private: } else if(_data_type == DataType::QASYMM8) { - std::uniform_int_distribution distribution(0, 30); + std::uniform_int_distribution distribution(_min_u8, _max_u8); + library->fill(tensor, distribution, i); + } + else if(_data_type == DataType::QASYMM8_SIGNED) + { + std::uniform_int_distribution distribution(_min_s8, _max_s8); library->fill(tensor, distribution, i); } else if(_data_type == DataType::S32) { - std::uniform_int_distribution distribution(-50, 50); + std::uniform_int_distribution distribution(_min_bias, _max_bias); library->fill(tensor, distribution, i); } else @@ -352,6 +418,40 @@ private: validate(AccessorType(target), ref, tolerance_qasymm8_signed); } + void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type) + { + _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1]; + + const int32_t t_max = static_cast(std::numeric_limits::max()); + const int32_t t_min = static_cast(std::numeric_limits::min()); + + std::mt19937 generator(library->seed() + _hash); + std::uniform_real_distribution distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + input_q_info = QuantizationInfo(scale_lhs, offset_lhs); + weights_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + const int k = weights_shape.x(); + QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/); + + _dst_q_info = q_hint.q_info; + _min_bias = q_hint.bias_min; + _max_bias = q_hint.bias_max; + + // Do not change here as these limits are the natural limits of the associated data types and + // are embedded in the computation of the dst quantization info. + _min_u8 = 0; + _max_u8 = 255; + _min_s8 = -128; + _max_s8 = 127; + } + public: using TDecay = typename std::decay::type; using TBias = typename std::conditional < (std::is_same::value || std::is_same::value), int32_t, T >::type; @@ -364,15 +464,22 @@ public: const bool is_quantized = is_data_type_quantized(data_type); const DataType bias_data_type = (is_quantized) ? DataType::S32 : data_type; - const QuantizationInfo src_qinfo = is_quantized ? QuantizationInfo(0.1f, 10) : QuantizationInfo(); - const QuantizationInfo weights_qinfo = is_quantized ? QuantizationInfo(0.3f, 20) : QuantizationInfo(); - const QuantizationInfo dst_qinfo = is_quantized ? QuantizationInfo(0.2f, 5) : QuantizationInfo(); + if (is_quantized && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY)) + { + setup_quantization(weights_shape, dst_shape, _src_q_info, _weights_q_info, data_type); + } + else + { + _src_q_info = QuantizationInfo(0.1f, 10); + _dst_q_info = QuantizationInfo(0.3f, 20); + _weights_q_info = QuantizationInfo(0.2f, 5); + } // Configure TensorInfo Objects - const TensorInfo src_info(src_shape, 1, data_type, src_qinfo); - const TensorInfo dst_info(dst_shape, 1, data_type, dst_qinfo); + const TensorInfo src_info(src_shape, 1, data_type, _src_q_info); + const TensorInfo dst_info(dst_shape, 1, data_type, _dst_q_info); TensorInfo bias_info(bias_shape, 1, bias_data_type); - TensorInfo wei_info(weights_shape, 1, data_type, weights_qinfo); + TensorInfo wei_info(weights_shape, 1, data_type, _weights_q_info); if(!constant_weights && weights_reshaped) { @@ -412,20 +519,20 @@ public: int randomizer_offset = 0; // Create reference tensors - SimpleTensor src{ src_shape, data_type, 1, src_qinfo }; - SimpleTensor weights{ weights_shape, data_type, 1, weights_qinfo }; + SimpleTensor src{ src_shape, data_type, 1, _src_q_info }; + SimpleTensor weights{ weights_shape, data_type, 1, _weights_q_info }; SimpleTensor bias{ bias_shape, bias_data_type }; // Fill weights and/or bias if they remain constant if(constant_weights) { - fill(AccessorType(_weights), 1); - fill(weights, 1); + fill(AccessorType(_weights), 1 + _hash); + fill(weights, 1 + _hash); } if(constant_bias && !remove_bias) { - fill(AccessorType(_bias), 2); - fill(bias, 2); + fill(AccessorType(_bias), 2 + _hash); + fill(bias, 2 + _hash); } // To remove bias, fill with 0 if(remove_bias && is_quantized) @@ -446,16 +553,16 @@ public: { if(weights_reshaped) { - fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1); + fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1 + _hash); } else { - fill(AccessorType(_weights), randomizer_offset + 1); + fill(AccessorType(_weights), randomizer_offset + 1 +_hash); } } if(!constant_bias && !remove_bias) { - fill(AccessorType(_bias), randomizer_offset + 2); + fill(AccessorType(_bias), randomizer_offset + 2 + _hash); } fc.run(); @@ -467,14 +574,14 @@ public: fill(src, randomizer_offset); if(!constant_weights) { - fill(weights, randomizer_offset + 1); + fill(weights, randomizer_offset + 1 + _hash); } if(!constant_bias && !remove_bias) { - fill(bias, randomizer_offset + 2); + fill(bias, randomizer_offset + 2 + _hash); } - auto dst = reference::activation_layer(reference::fully_connected_layer(src, weights, bias, dst_shape, dst_qinfo), activation_info, dst_qinfo); + auto dst = reference::activation_layer(reference::fully_connected_layer(src, weights, bias, dst_shape, _dst_q_info), activation_info, _dst_q_info); // Validate validate_with_tolerance(_dst, dst); @@ -487,6 +594,22 @@ public: private: TensorType _src{}, _weights{}, _bias{}, _dst{}; DataType _data_type{ DataType::UNKNOWN }; + + QuantizationInfo _src_q_info{}; + QuantizationInfo _weights_q_info{}; + QuantizationInfo _dst_q_info{}; + + // Random initialization limits + // Default values are previously handcrafted limits + // that sould be used when we don't use dynamic quantization + int32_t _min_bias{-50}; + int32_t _max_bias{50}; + + int32_t _min_u8{0}; + int32_t _max_u8{30}; + int32_t _min_s8{-15}; + int32_t _max_s8{15}; + int _hash{0}; }; template @@ -521,7 +644,7 @@ public: DataType data_type, ActivationLayerInfo activation_info) { FullyConnectedWithDynamicTensorsFixture::setup(src_shape, weights_shape, bias_shape, - dst_shape, data_type, activation_info, true, false, false, false /* weights_reshaped (not used) */); + dst_shape, data_type, activation_info, true, false, false, false); } }; } // namespace validation -- cgit v1.2.1