From edf5230830f8d9794ef0aeb8986da99734ff925f Mon Sep 17 00:00:00 2001 From: Francis Murtagh Date: Fri, 7 Jun 2019 11:28:49 +0100 Subject: IVGCVSW-3228 Fix bias quantization to be INT32 not QAsymm8 * Add function to calculate bias tensor quantization scale from input and weights scales. * Change visitor method of Conv2d, DepthwiseConv and FullyConnected to use the new function. * Fix Unit tests to expect correctly calculated quantization parameters. Change-Id: Ic36f47ceea81243c813d74ccf791e984c819cc71 Signed-off-by: Francis Murtagh --- src/armnn/QuantizerVisitor.cpp | 59 +++++++++++++++++++++++++--- src/armnn/QuantizerVisitor.hpp | 6 +++ src/armnn/test/QuantizerTest.cpp | 84 ++++++++++++++++++++++------------------ 3 files changed, 106 insertions(+), 43 deletions(-) diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp index 47ddc4ed29..7ba56757c2 100644 --- a/src/armnn/QuantizerVisitor.cpp +++ b/src/armnn/QuantizerVisitor.cpp @@ -24,12 +24,15 @@ QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker, void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) { + BOOST_ASSERT(srcLayer); for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) { const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); const InputSlot* inputSlot = boost::polymorphic_downcast(&srcInputSlot); + BOOST_ASSERT(inputSlot); const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); + BOOST_ASSERT(outputSlot); unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); Layer& layerToFind = outputSlot->GetOwningLayer(); @@ -60,6 +63,50 @@ void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* src } } +ConstTensor QuantizerVisitor::CreateQuantizedBias(const IConnectableLayer* srcLayer, + const ConstTensor& weights, + const Optional& biases, + std::vector& backing) +{ + BOOST_ASSERT(srcLayer); + const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); + auto inputSlot = boost::polymorphic_downcast(&srcInputSlot); + BOOST_ASSERT(inputSlot); + const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); + + BOOST_ASSERT(outputSlot); + unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); + Layer& layerToFind = outputSlot->GetOwningLayer(); + + auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); + if (found == m_OriginalToQuantizedGuidMap.end()) + { + // Error in graph traversal order + BOOST_ASSERT_MSG(false, "Error in graph traversal"); + return biases.value(); + } + + // Fetch the min/max ranges that were computed earlier + auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); + OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); + + // Get the quantization scale based on input and weight scale + float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); + + // Set up quantized bias tensor info and allocate space + TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); + backing.resize(biases.value().GetInfo().GetNumElements()); + + // Convert values to int32 + for (size_t i = 0; i < backing.size(); ++i) + { + float fp32Value = static_cast(biases.value().GetMemoryArea())[i]; + backing[i] = boost::numeric_cast(fp32Value * ( 1 / scale )); + } + + return ConstTensor(qInfo, backing); +} + void QuantizerVisitor::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) { m_OriginalToQuantizedGuidMap[srcLayer->GetGuid()] = quantizedLayer->GetGuid(); @@ -151,11 +198,11 @@ void QuantizerVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer, std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); Optional optionalQBiases; - std::vector biasesBacking; + std::vector biasesBacking; if (biases.has_value()) { - ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking); + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } @@ -177,11 +224,11 @@ void QuantizerVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); Optional optionalQBiases; - std::vector biasesBacking; + std::vector biasesBacking; if (biases.has_value()) { - ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking); + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } @@ -203,11 +250,11 @@ void QuantizerVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer, std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); Optional optionalQBiases; - std::vector biasesBacking; + std::vector biasesBacking; if (biases.has_value()) { - ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking); + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp index 6e5609df02..6463350f2b 100644 --- a/src/armnn/QuantizerVisitor.hpp +++ b/src/armnn/QuantizerVisitor.hpp @@ -139,6 +139,12 @@ private: /// Record the guids so we can easily find the layers later void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer); + /// Sets the bias quantization scale based on input and weight scales + ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer, + const ConstTensor& weights, + const Optional& biases, + std::vector& weightsBacking); + /// Reference to the static range visitor used to retrieve the quantization ranges const RangeTracker& m_Ranges; diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp index 337c61585f..213018ab85 100644 --- a/src/armnn/test/QuantizerTest.cpp +++ b/src/armnn/test/QuantizerTest.cpp @@ -86,11 +86,54 @@ protected: } } - void TestConstantQuantizationParams(const TensorInfo& info, const OffsetScalePair& params) + void TestConstantQuantizationParams(const TensorInfo& info, + const OffsetScalePair& params, + DataType dataType = DataType::QuantisedAsymm8) { TestQuantizationParamsImpl(info, DataType::QuantisedAsymm8, params.first, params.second); } + void TestBiasQuantizationParams(const TensorInfo& info, + const OffsetScalePair& qAsymm8Params, + const OffsetScalePair& qSymm16Params, + DataType dataType = DataType::QuantisedAsymm8) + { + switch (m_QuantizerOptions.m_ActivationFormat) + { + case DataType::QuantisedAsymm8: + TestQuantizationParamsImpl(info, dataType, qAsymm8Params.first, qAsymm8Params.second); + break; + case DataType::QuantisedSymm16: + TestQuantizationParamsImpl(info, dataType, qSymm16Params.first, qSymm16Params.second); + break; + default: + throw InvalidArgumentException("Unsupported quantization target"); + } + } + + void TestQuantizationOnLayersWithBiases(const IConnectableLayer* layer, + const ConstTensor& weights, + const Optional& biases) + { + TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); + float inputScaleQAsymm8 = 30.0f / g_Asymm8QuantizationBase; + float inputScaleQSymm16 = 15.0f / g_Symm16QuantizationBase; + float weightsScale = 3.0f / g_Asymm8QuantizationBase; + + // Based off default static range [-15.0f, 15.0f] + TestQuantizationParams(info, {inputScaleQAsymm8, 128}, {inputScaleQSymm16, 0}); + + TestConstantQuantizationParams(weights.GetInfo(), {weightsScale, 85}); + + if (biases.has_value()) + { + TestBiasQuantizationParams(biases.value().GetInfo(), + {inputScaleQAsymm8 * weightsScale, 0}, + {inputScaleQSymm16 * weightsScale, 0}, + DataType::Signed32); + } + } + TensorShape m_InputShape; TensorShape m_OutputShape; @@ -726,18 +769,7 @@ void ValidateFullyConnectedLayer(const bool biasEnabled) const Optional& biases, const char* name = nullptr) override { - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0}); - - TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85}); - - if (biases.has_value()) - { - TestConstantQuantizationParams(biases.value().GetInfo(), {30.0f / g_Asymm8QuantizationBase, 0}); - } + TestQuantizationOnLayersWithBiases(layer, weights, biases); } }; @@ -783,18 +815,7 @@ void TestQuantizeConvolution2d(bool useBiases) const Optional& biases, const char *name = nullptr) override { - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0}); - - TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85}); - - if (biases.has_value()) - { - TestConstantQuantizationParams(biases.value().GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85}); - } + TestQuantizationOnLayersWithBiases(layer, weights, biases); } }; @@ -869,18 +890,7 @@ void TestQuantizeDepthwiseConvolution2d(bool useBiases) const Optional& biases, const char *name = nullptr) override { - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0}); - - TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85}); - - if (biases.has_value()) - { - TestConstantQuantizationParams(biases.value().GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85}); - } + TestQuantizationOnLayersWithBiases(layer, weights, biases); } }; -- cgit v1.2.1