Add Dynamic Quantization tests to Fully Connected Layer

This patch calculates the output quantization info based on the inputs' quantization information. The previous approach was using the same quantization information for input, weights and output. This implementation does not cover the cases where we have fused activation function. Resolves: [COMPMID-6484] Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> Change-Id: Ib58143165191e82ae8547e661ac7c8d077bda200 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10539 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com> 2023-10-26 00:14:36 +0100
committer: Mohmun02 <MohammedSuhail.Munshi@arm.com> 2023-11-03 14:01:37 +0000
commit: 02c452fe1ec17c3941272a07b5cae1f32d614c56 (patch)
tree: 3fc26399885bfefe6b9b086ea2b7fefc7eff54e2 /tests/validation/fixtures/FullyConnectedLayerFixture.h
parent: c259aa5e04714bb3a8d23a6903161c240c279743 (diff)
download: ComputeLibrary-02c452fe1ec17c3941272a07b5cae1f32d614c56.tar.gz
1 files changed, 164 insertions, 41 deletions
diff --git a/tests/validation/fixtures/FullyConnectedLayerFixture.h b/tests/validation/fixtures/FullyConnectedLayerFixture.h
index 7cfe6e49b9..05f20ac12b 100644
--- a/tests/validation/fixtures/FullyConnectedLayerFixture.h
+++ b/tests/validation/fixtures/FullyConnectedLayerFixture.h
@@ -55,6 +55,40 @@ public:
     using TBias  = typename std::conditional < (std::is_same<TDecay, uint8_t>::value || std::is_same<TDecay, int8_t>::value), int32_t, T >::type;
 
 public:
+    void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type)
+    {
+        _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1];
+        const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+        const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+        std::mt19937                           generator(library->seed() + _hash);
+        std::uniform_real_distribution<float>  distribution_float(-5.0f, 3.0f);
+        std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+        const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const int32_t offset_lhs = distribution_t(generator);
+        const int32_t offset_rhs = distribution_t(generator);
+
+        input_q_info = QuantizationInfo(scale_lhs, offset_lhs);
+        weights_q_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+
+        const int k = weights_shape.x();
+        QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/);
+
+        _dst_q_info = q_hint.q_info;
+        _min_bias = q_hint.bias_min;
+        _max_bias = q_hint.bias_max;
+
+        // Do not change here as these limits are the natural limits of the associated data types and
+        // are embedded in the computation of the dst quantization info.
+        _min_u8 = 0;
+        _max_u8 = 255;
+        _min_s8 = -128;
+        _max_s8 = 127;
+    }
+
     void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, bool transpose_weights, bool reshape_weights,
                DataType data_type, QuantizationInfo quantization_info, ActivationLayerInfo activation_info, bool mixed_layout = false)
     {
@@ -64,7 +98,20 @@ public:
         _mixed_layout      = mixed_layout;
         _data_type         = data_type;
         _bias_data_type    = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
-        _quantization_info = quantization_info;
+
+        // Note : Quantization Info parameter from setup function is only used when quant datatype and activation function is not enabled or is identity.
+        if(is_data_type_quantized(data_type) && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY))
+        {
+            // Initialises quantization info with appropriate scale and offset for given input shapes.
+            setup_quantization(weights_shape, output_shape,_input_q_info, _weight_q_info, data_type);
+        }
+        else
+        {
+            _input_q_info = quantization_info;
+            _weight_q_info = quantization_info;
+            _dst_q_info = quantization_info;
+        }
+
         _activation_info   = activation_info;
 
         _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, transpose_weights, reshape_weights);
@@ -92,17 +139,17 @@ protected:
     {
         if(_data_type == DataType::QASYMM8)
         {
-            std::uniform_int_distribution<uint32_t> distribution(0, 30);
+            std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8);
             library->fill(tensor, distribution, i);
         }
         else if(_data_type == DataType::QASYMM8_SIGNED)
         {
-            std::uniform_int_distribution<int32_t> distribution(-15, 15);
+            std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8);
             library->fill(tensor, distribution, i);
         }
         else if(_data_type == DataType::S32)
         {
-            std::uniform_int_distribution<int32_t> distribution(-50, 50);
+            std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
             library->fill(tensor, distribution, i);
         }
         else if(_data_type == DataType::F16)
@@ -144,10 +191,10 @@ protected:
         }
 
         // Create tensors
-        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info);
-        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _quantization_info);
-        TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info);
-        TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, _quantization_info);
+        TensorType src     = create_tensor<TensorType>(input_shape, _data_type, 1, _input_q_info);
+        TensorType weights = create_tensor<TensorType>(reshaped_weights_shape, _data_type, 1, _weight_q_info);
+        TensorType bias    = create_tensor<TensorType>(bias_shape, _bias_data_type, 1);
+        TensorType dst     = create_tensor<TensorType>(output_shape, _data_type, 1, _dst_q_info);
 
         // Create Fully Connected layer info
         FullyConnectedLayerInfo fc_info;
@@ -178,8 +225,8 @@ protected:
         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
 
         // Fill tensors
-        fill(AccessorType(src), 0);
-        fill(AccessorType(bias), 2);
+        fill(AccessorType(src), 0 + _hash);
+        fill(AccessorType(bias), 2 + _hash);
 
         if(!reshape_weights || !transpose_weights)
         {
@@ -187,7 +234,7 @@ protected:
             RawTensor   tmp(tmp_shape, _data_type, 1);
 
             // Fill with original shape
-            fill(tmp, 1);
+            fill(tmp, 1 + _hash);
 
             // Transpose elementwise
             tmp = transpose(tmp);
@@ -204,7 +251,7 @@ protected:
         }
         else
         {
-            fill(AccessorType(weights), 1);
+            fill(AccessorType(weights), 1 + _hash);
         }
 
         if(_mixed_layout)
@@ -223,16 +270,16 @@ protected:
     SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape)
     {
         // Create reference
-        SimpleTensor<T>     src{ input_shape, _data_type, 1, _quantization_info };
-        SimpleTensor<T>     weights{ weights_shape, _data_type, 1, _quantization_info };
-        SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _quantization_info };
+        SimpleTensor<T>     src{ input_shape, _data_type, 1, _input_q_info };
+        SimpleTensor<T>     weights{ weights_shape, _data_type, 1, _weight_q_info };
+        SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, QuantizationInfo() };
 
         // Fill reference
-        fill(src, 0);
-        fill(weights, 1);
-        fill(bias, 2);
+        fill(src, 0 + _hash);
+        fill(weights, 1 + _hash);
+        fill(bias, 2 + _hash);
 
-        return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape, _quantization_info), _activation_info, _quantization_info);
+        return reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, output_shape, _dst_q_info), _activation_info, _dst_q_info);
     }
 
     TensorType          _target{};
@@ -240,8 +287,22 @@ protected:
     DataType            _data_type{};
     DataType            _bias_data_type{};
     bool                _mixed_layout{ false };
-    QuantizationInfo    _quantization_info{};
+    QuantizationInfo    _input_q_info{};
+    QuantizationInfo    _weight_q_info{};
+    QuantizationInfo    _dst_q_info{};
     ActivationLayerInfo _activation_info{};
+
+    // Random initialization limits
+    // Default values are previously handcrafted limits
+    // that sould be used when we don't use dynamic quantization
+    int32_t _min_bias{-50};
+    int32_t _max_bias{50};
+
+    int32_t _min_u8{0};
+    int32_t _max_u8{30};
+    int32_t _min_s8{-15};
+    int32_t _max_s8{15};
+    int    _hash{0};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
@@ -289,12 +350,17 @@ private:
         }
         else if(_data_type == DataType::QASYMM8)
         {
-            std::uniform_int_distribution<uint32_t> distribution(0, 30);
+            std::uniform_int_distribution<uint32_t> distribution(_min_u8, _max_u8);
+            library->fill(tensor, distribution, i);
+        }
+        else if(_data_type == DataType::QASYMM8_SIGNED)
+        {
+            std::uniform_int_distribution<int32_t> distribution(_min_s8, _max_s8);
             library->fill(tensor, distribution, i);
         }
         else if(_data_type == DataType::S32)
         {
-            std::uniform_int_distribution<int32_t> distribution(-50, 50);
+            std::uniform_int_distribution<int32_t> distribution(_min_bias, _max_bias);
             library->fill(tensor, distribution, i);
         }
         else
@@ -352,6 +418,40 @@ private:
         validate(AccessorType(target), ref, tolerance_qasymm8_signed);
     }
 
+    void setup_quantization(TensorShape weights_shape, TensorShape output_shape, QuantizationInfo &input_q_info, QuantizationInfo &weights_q_info, DataType data_type)
+    {
+        _hash = weights_shape[0] + weights_shape[1] + output_shape[0] + output_shape[1];
+
+        const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+        const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+        std::mt19937                           generator(library->seed() + _hash);
+        std::uniform_real_distribution<float>  distribution_float(-5.0f, 3.0f);
+        std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+        const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const int32_t offset_lhs = distribution_t(generator);
+        const int32_t offset_rhs = distribution_t(generator);
+
+        input_q_info = QuantizationInfo(scale_lhs, offset_lhs);
+        weights_q_info = QuantizationInfo(scale_rhs, offset_rhs);
+
+        const int k = weights_shape.x();
+        QuantizationHint q_hint = suggest_mac_dst_q_info_and_bias(input_q_info, weights_q_info, k, data_type, 0.1f /* bias_fraction */, 4 /* number of standard deviations*/);
+
+        _dst_q_info = q_hint.q_info;
+        _min_bias = q_hint.bias_min;
+        _max_bias = q_hint.bias_max;
+
+        // Do not change here as these limits are the natural limits of the associated data types and
+        // are embedded in the computation of the dst quantization info.
+        _min_u8 = 0;
+        _max_u8 = 255;
+        _min_s8 = -128;
+        _max_s8 = 127;
+    }
+
 public:
     using TDecay = typename std::decay<T>::type;
     using TBias  = typename std::conditional < (std::is_same<TDecay, uint8_t>::value || std::is_same<TDecay, int8_t>::value), int32_t, T >::type;
@@ -364,15 +464,22 @@ public:
         const bool     is_quantized   = is_data_type_quantized(data_type);
         const DataType bias_data_type = (is_quantized) ? DataType::S32 : data_type;
 
-        const QuantizationInfo src_qinfo     = is_quantized ? QuantizationInfo(0.1f, 10) : QuantizationInfo();
-        const QuantizationInfo weights_qinfo = is_quantized ? QuantizationInfo(0.3f, 20) : QuantizationInfo();
-        const QuantizationInfo dst_qinfo     = is_quantized ? QuantizationInfo(0.2f, 5) : QuantizationInfo();
+        if (is_quantized && (!activation_info.enabled() || activation_info.activation() == ActivationFunction::IDENTITY))
+        {
+            setup_quantization(weights_shape, dst_shape, _src_q_info, _weights_q_info, data_type);
+        }
+        else
+        {
+            _src_q_info = QuantizationInfo(0.1f, 10);
+            _dst_q_info = QuantizationInfo(0.3f, 20);
+            _weights_q_info = QuantizationInfo(0.2f, 5);
+        }
 
         // Configure TensorInfo Objects
-        const TensorInfo src_info(src_shape, 1, data_type, src_qinfo);
-        const TensorInfo dst_info(dst_shape, 1, data_type, dst_qinfo);
+        const TensorInfo src_info(src_shape, 1, data_type, _src_q_info);
+        const TensorInfo dst_info(dst_shape, 1, data_type, _dst_q_info);
         TensorInfo       bias_info(bias_shape, 1, bias_data_type);
-        TensorInfo       wei_info(weights_shape, 1, data_type, weights_qinfo);
+        TensorInfo       wei_info(weights_shape, 1, data_type, _weights_q_info);
 
         if(!constant_weights && weights_reshaped)
         {
@@ -412,20 +519,20 @@ public:
         int           randomizer_offset = 0;
 
         // Create reference tensors
-        SimpleTensor<T>     src{ src_shape, data_type, 1, src_qinfo };
-        SimpleTensor<T>     weights{ weights_shape, data_type, 1, weights_qinfo };
+        SimpleTensor<T>     src{ src_shape, data_type, 1, _src_q_info };
+        SimpleTensor<T>     weights{ weights_shape, data_type, 1, _weights_q_info };
         SimpleTensor<TBias> bias{ bias_shape, bias_data_type };
 
         // Fill weights and/or bias if they remain constant
         if(constant_weights)
         {
-            fill(AccessorType(_weights), 1);
-            fill(weights, 1);
+            fill(AccessorType(_weights), 1 + _hash);
+            fill(weights, 1 + _hash);
         }
         if(constant_bias && !remove_bias)
         {
-            fill(AccessorType(_bias), 2);
-            fill(bias, 2);
+            fill(AccessorType(_bias), 2 + _hash);
+            fill(bias, 2 + _hash);
         }
         // To remove bias, fill with 0
         if(remove_bias && is_quantized)
@@ -446,16 +553,16 @@ public:
                 {
                     if(weights_reshaped)
                     {
-                        fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1);
+                        fill_transposed_weights(_weights, weights_shape, randomizer_offset + 1 + _hash);
                     }
                     else
                     {
-                        fill(AccessorType(_weights), randomizer_offset + 1);
+                        fill(AccessorType(_weights), randomizer_offset + 1 +_hash);
                     }
                 }
                 if(!constant_bias && !remove_bias)
                 {
-                    fill(AccessorType(_bias), randomizer_offset + 2);
+                    fill(AccessorType(_bias), randomizer_offset + 2 + _hash);
                 }
 
                 fc.run();
@@ -467,14 +574,14 @@ public:
                 fill(src, randomizer_offset);
                 if(!constant_weights)
                 {
-                    fill(weights, randomizer_offset + 1);
+                    fill(weights, randomizer_offset + 1 + _hash);
                 }
                 if(!constant_bias && !remove_bias)
                 {
-                    fill(bias, randomizer_offset + 2);
+                    fill(bias, randomizer_offset + 2 + _hash);
                 }
 
-                auto dst = reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, dst_shape, dst_qinfo), activation_info, dst_qinfo);
+                auto dst = reference::activation_layer(reference::fully_connected_layer<T>(src, weights, bias, dst_shape, _dst_q_info), activation_info, _dst_q_info);
 
                 // Validate
                 validate_with_tolerance(_dst, dst);
@@ -487,6 +594,22 @@ public:
 private:
     TensorType _src{}, _weights{}, _bias{}, _dst{};
     DataType   _data_type{ DataType::UNKNOWN };
+
+    QuantizationInfo _src_q_info{};
+    QuantizationInfo _weights_q_info{};
+    QuantizationInfo _dst_q_info{};
+
+    // Random initialization limits
+    // Default values are previously handcrafted limits
+    // that sould be used when we don't use dynamic quantization
+    int32_t _min_bias{-50};
+    int32_t _max_bias{50};
+
+    int32_t _min_u8{0};
+    int32_t _max_u8{30};
+    int32_t _min_s8{-15};
+    int32_t _max_s8{15};
+    int     _hash{0};
 };
 
 template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
@@ -521,7 +644,7 @@ public:
                DataType data_type, ActivationLayerInfo activation_info)
     {
         FullyConnectedWithDynamicTensorsFixture<TensorType, AccessorType, FunctionType, T>::setup(src_shape, weights_shape, bias_shape,
-                                                                                                  dst_shape, data_type, activation_info, true, false, false, false /* weights_reshaped (not used) */);
+                                                                                                  dst_shape, data_type, activation_info, true, false, false, false);
     }
 };
 } // namespace validation
author	Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>	2023-10-26 00:14:36 +0100
committer	Mohmun02 <MohammedSuhail.Munshi@arm.com>	2023-11-03 14:01:37 +0000
commit	02c452fe1ec17c3941272a07b5cae1f32d614c56 (patch)
tree	3fc26399885bfefe6b9b086ea2b7fefc7eff54e2 /tests/validation/fixtures/FullyConnectedLayerFixture.h
parent	c259aa5e04714bb3a8d23a6903161c240c279743 (diff)
download	ComputeLibrary-02c452fe1ec17c3941272a07b5cae1f32d614c56.tar.gz