From edf5230830f8d9794ef0aeb8986da99734ff925f Mon Sep 17 00:00:00 2001
From: Francis Murtagh <francis.murtagh@arm.com>
Date: Fri, 7 Jun 2019 11:28:49 +0100
Subject: IVGCVSW-3228 Fix bias quantization to be INT32 not QAsymm8

 * Add function to calculate bias tensor quantization scale
   from input and weights scales.
 * Change visitor method of Conv2d, DepthwiseConv and FullyConnected to use
   the new function.
 * Fix Unit tests to expect correctly calculated quantization parameters.

Change-Id: Ic36f47ceea81243c813d74ccf791e984c819cc71
Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
---
 src/armnn/QuantizerVisitor.cpp   | 59 +++++++++++++++++++++++++---
 src/armnn/QuantizerVisitor.hpp   |  6 +++
 src/armnn/test/QuantizerTest.cpp | 84 ++++++++++++++++++++++------------------
 3 files changed, 106 insertions(+), 43 deletions(-)
diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp
index 47ddc4ed29..7ba56757c2 100644
--- a/src/armnn/QuantizerVisitor.cpp
+++ b/src/armnn/QuantizerVisitor.cpp
@@ -24,12 +24,15 @@ QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker,
 void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* srcLayer,
                                                     IConnectableLayer* quantizedLayer)
 {
+    BOOST_ASSERT(srcLayer);
     for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++)
     {
         const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i);
         const InputSlot* inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+        BOOST_ASSERT(inputSlot);
         const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
 
+        BOOST_ASSERT(outputSlot);
         unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
         Layer& layerToFind = outputSlot->GetOwningLayer();
 
@@ -60,6 +63,50 @@ void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* src
     }
 }
 
+ConstTensor QuantizerVisitor::CreateQuantizedBias(const IConnectableLayer* srcLayer,
+                                                  const ConstTensor& weights,
+                                                  const Optional<ConstTensor>& biases,
+                                                  std::vector<int32_t>& backing)
+{
+    BOOST_ASSERT(srcLayer);
+    const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0);
+    auto inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+    BOOST_ASSERT(inputSlot);
+    const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
+
+    BOOST_ASSERT(outputSlot);
+    unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
+    Layer& layerToFind = outputSlot->GetOwningLayer();
+
+    auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid());
+    if (found == m_OriginalToQuantizedGuidMap.end())
+    {
+        // Error in graph traversal order
+        BOOST_ASSERT_MSG(false, "Error in graph traversal");
+        return biases.value();
+    }
+
+    // Fetch the min/max ranges that were computed earlier
+    auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx);
+    OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second);
+
+    // Get the quantization scale based on input and weight scale
+    float scale = qParams.first * weights.GetInfo().GetQuantizationScale();
+
+    // Set up quantized bias tensor info and allocate space
+    TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0);
+    backing.resize(biases.value().GetInfo().GetNumElements());
+
+    // Convert values to int32
+    for (size_t i = 0; i < backing.size(); ++i)
+    {
+        float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i];
+        backing[i] = boost::numeric_cast<int32_t>(fp32Value * ( 1 / scale ));
+    }
+
+    return ConstTensor(qInfo, backing);
+}
+
 void QuantizerVisitor::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer)
 {
     m_OriginalToQuantizedGuidMap[srcLayer->GetGuid()] = quantizedLayer->GetGuid();
@@ -151,11 +198,11 @@ void QuantizerVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
     std::vector<uint8_t> weightsBacking;
     ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
     Optional<ConstTensor> optionalQBiases;
-    std::vector<uint8_t> biasesBacking;
+    std::vector<int32_t> biasesBacking;
 
     if (biases.has_value())
     {
-        ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+        ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
         optionalQBiases = Optional<ConstTensor>(qBiases);
     }
 
@@ -177,11 +224,11 @@ void QuantizerVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer*
     std::vector<uint8_t> weightsBacking;
     ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
     Optional<ConstTensor> optionalQBiases;
-    std::vector<uint8_t> biasesBacking;
+    std::vector<int32_t> biasesBacking;
 
     if (biases.has_value())
     {
-        ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+        ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
         optionalQBiases = Optional<ConstTensor>(qBiases);
     }
 
@@ -203,11 +250,11 @@ void QuantizerVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
     std::vector<uint8_t> weightsBacking;
     ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
     Optional<ConstTensor> optionalQBiases;
-    std::vector<uint8_t> biasesBacking;
+    std::vector<int32_t> biasesBacking;
 
     if (biases.has_value())
     {
-        ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+        ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
         optionalQBiases = Optional<ConstTensor>(qBiases);
     }
 
diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp
index 6e5609df02..6463350f2b 100644
--- a/src/armnn/QuantizerVisitor.hpp
+++ b/src/armnn/QuantizerVisitor.hpp
@@ -139,6 +139,12 @@ private:
     /// Record the guids so we can easily find the layers later
     void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer);
 
+    /// Sets the bias quantization scale based on input and weight scales
+    ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer,
+                                    const ConstTensor& weights,
+                                    const Optional<ConstTensor>& biases,
+                                    std::vector<int32_t>& weightsBacking);
+
     /// Reference to the static range visitor used to retrieve the quantization ranges
     const RangeTracker& m_Ranges;
 
diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp
index 337c61585f..213018ab85 100644
--- a/src/armnn/test/QuantizerTest.cpp
+++ b/src/armnn/test/QuantizerTest.cpp
@@ -86,11 +86,54 @@ protected:
         }
     }
 
-    void TestConstantQuantizationParams(const TensorInfo& info, const OffsetScalePair& params)
+    void TestConstantQuantizationParams(const TensorInfo& info,
+                                        const OffsetScalePair& params,
+                                        DataType dataType = DataType::QuantisedAsymm8)
     {
         TestQuantizationParamsImpl(info, DataType::QuantisedAsymm8, params.first, params.second);
     }
 
+    void TestBiasQuantizationParams(const TensorInfo& info,
+                                    const OffsetScalePair& qAsymm8Params,
+                                    const OffsetScalePair& qSymm16Params,
+                                    DataType dataType = DataType::QuantisedAsymm8)
+    {
+        switch (m_QuantizerOptions.m_ActivationFormat)
+        {
+            case DataType::QuantisedAsymm8:
+                TestQuantizationParamsImpl(info, dataType, qAsymm8Params.first, qAsymm8Params.second);
+                break;
+            case DataType::QuantisedSymm16:
+                TestQuantizationParamsImpl(info, dataType, qSymm16Params.first, qSymm16Params.second);
+                break;
+            default:
+                throw InvalidArgumentException("Unsupported quantization target");
+        }
+    }
+
+    void TestQuantizationOnLayersWithBiases(const IConnectableLayer* layer,
+                                            const ConstTensor& weights,
+                                            const Optional<ConstTensor>& biases)
+    {
+        TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
+        float inputScaleQAsymm8 = 30.0f / g_Asymm8QuantizationBase;
+        float inputScaleQSymm16 = 15.0f / g_Symm16QuantizationBase;
+        float weightsScale = 3.0f / g_Asymm8QuantizationBase;
+
+        // Based off default static range [-15.0f, 15.0f]
+        TestQuantizationParams(info, {inputScaleQAsymm8, 128}, {inputScaleQSymm16, 0});
+
+        TestConstantQuantizationParams(weights.GetInfo(), {weightsScale, 85});
+
+        if (biases.has_value())
+        {
+            TestBiasQuantizationParams(biases.value().GetInfo(),
+                                       {inputScaleQAsymm8 * weightsScale, 0},
+                                       {inputScaleQSymm16 * weightsScale, 0},
+                                       DataType::Signed32);
+        }
+    }
+
     TensorShape m_InputShape;
     TensorShape m_OutputShape;
 
@@ -726,18 +769,7 @@ void ValidateFullyConnectedLayer(const bool biasEnabled)
                                       const Optional<ConstTensor>& biases,
                                       const char* name = nullptr) override
         {
-            TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
-
-            // Based off default static range [-15.0f, 15.0f]
-            TestQuantizationParams(
-                info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0});
-
-            TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-
-            if (biases.has_value())
-            {
-                TestConstantQuantizationParams(biases.value().GetInfo(), {30.0f / g_Asymm8QuantizationBase, 0});
-            }
+            TestQuantizationOnLayersWithBiases(layer, weights, biases);
         }
     };
 
@@ -783,18 +815,7 @@ void TestQuantizeConvolution2d(bool useBiases)
                                      const Optional<ConstTensor>& biases,
                                      const char *name = nullptr) override
         {
-            TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
-
-            // Based off default static range [-15.0f, 15.0f]
-            TestQuantizationParams(
-                info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0});
-
-            TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-
-            if (biases.has_value())
-            {
-                TestConstantQuantizationParams(biases.value().GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-            }
+            TestQuantizationOnLayersWithBiases(layer, weights, biases);
         }
     };
 
@@ -869,18 +890,7 @@ void TestQuantizeDepthwiseConvolution2d(bool useBiases)
                                               const Optional<ConstTensor>& biases,
                                               const char *name = nullptr) override
         {
-            TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
-
-            // Based off default static range [-15.0f, 15.0f]
-            TestQuantizationParams(
-                info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0});
-
-            TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-
-            if (biases.has_value())
-            {
-                TestConstantQuantizationParams(biases.value().GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-            }
+            TestQuantizationOnLayersWithBiases(layer, weights, biases);
         }
     };
 
-- 
cgit v1.2.1