aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrancis Murtagh <francis.murtagh@arm.com>2019-06-07 11:28:49 +0100
committerFrancis Murtagh <francis.murtagh@arm.com>2019-06-07 12:23:15 +0000
commitedf5230830f8d9794ef0aeb8986da99734ff925f (patch)
treee2f35f3088e594fdde2361af4b73fb23485f3702
parent49ab7ada17a354caa5b6263c3a732e55d9cd2743 (diff)
downloadarmnn-edf5230830f8d9794ef0aeb8986da99734ff925f.tar.gz
IVGCVSW-3228 Fix bias quantization to be INT32 not QAsymm8
* Add function to calculate bias tensor quantization scale from input and weights scales. * Change visitor method of Conv2d, DepthwiseConv and FullyConnected to use the new function. * Fix Unit tests to expect correctly calculated quantization parameters. Change-Id: Ic36f47ceea81243c813d74ccf791e984c819cc71 Signed-off-by: Francis Murtagh <francis.murtagh@arm.com>
-rw-r--r--src/armnn/QuantizerVisitor.cpp59
-rw-r--r--src/armnn/QuantizerVisitor.hpp6
-rw-r--r--src/armnn/test/QuantizerTest.cpp84
3 files changed, 106 insertions, 43 deletions
diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp
index 47ddc4ed29..7ba56757c2 100644
--- a/src/armnn/QuantizerVisitor.cpp
+++ b/src/armnn/QuantizerVisitor.cpp
@@ -24,12 +24,15 @@ QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker,
void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* srcLayer,
IConnectableLayer* quantizedLayer)
{
+ BOOST_ASSERT(srcLayer);
for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++)
{
const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i);
const InputSlot* inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+ BOOST_ASSERT(inputSlot);
const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
+ BOOST_ASSERT(outputSlot);
unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
Layer& layerToFind = outputSlot->GetOwningLayer();
@@ -60,6 +63,50 @@ void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* src
}
}
+ConstTensor QuantizerVisitor::CreateQuantizedBias(const IConnectableLayer* srcLayer,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ std::vector<int32_t>& backing)
+{
+ BOOST_ASSERT(srcLayer);
+ const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0);
+ auto inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+ BOOST_ASSERT(inputSlot);
+ const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
+
+ BOOST_ASSERT(outputSlot);
+ unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
+ Layer& layerToFind = outputSlot->GetOwningLayer();
+
+ auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid());
+ if (found == m_OriginalToQuantizedGuidMap.end())
+ {
+ // Error in graph traversal order
+ BOOST_ASSERT_MSG(false, "Error in graph traversal");
+ return biases.value();
+ }
+
+ // Fetch the min/max ranges that were computed earlier
+ auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx);
+ OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second);
+
+ // Get the quantization scale based on input and weight scale
+ float scale = qParams.first * weights.GetInfo().GetQuantizationScale();
+
+ // Set up quantized bias tensor info and allocate space
+ TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0);
+ backing.resize(biases.value().GetInfo().GetNumElements());
+
+ // Convert values to int32
+ for (size_t i = 0; i < backing.size(); ++i)
+ {
+ float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i];
+ backing[i] = boost::numeric_cast<int32_t>(fp32Value * ( 1 / scale ));
+ }
+
+ return ConstTensor(qInfo, backing);
+}
+
void QuantizerVisitor::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer)
{
m_OriginalToQuantizedGuidMap[srcLayer->GetGuid()] = quantizedLayer->GetGuid();
@@ -151,11 +198,11 @@ void QuantizerVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
Optional<ConstTensor> optionalQBiases;
- std::vector<uint8_t> biasesBacking;
+ std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
- ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+ ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
@@ -177,11 +224,11 @@ void QuantizerVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer*
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
Optional<ConstTensor> optionalQBiases;
- std::vector<uint8_t> biasesBacking;
+ std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
- ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+ ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
@@ -203,11 +250,11 @@ void QuantizerVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
Optional<ConstTensor> optionalQBiases;
- std::vector<uint8_t> biasesBacking;
+ std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
- ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+ ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp
index 6e5609df02..6463350f2b 100644
--- a/src/armnn/QuantizerVisitor.hpp
+++ b/src/armnn/QuantizerVisitor.hpp
@@ -139,6 +139,12 @@ private:
/// Record the guids so we can easily find the layers later
void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer);
+ /// Sets the bias quantization scale based on input and weight scales
+ ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ std::vector<int32_t>& weightsBacking);
+
/// Reference to the static range visitor used to retrieve the quantization ranges
const RangeTracker& m_Ranges;
diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp
index 337c61585f..213018ab85 100644
--- a/src/armnn/test/QuantizerTest.cpp
+++ b/src/armnn/test/QuantizerTest.cpp
@@ -86,11 +86,54 @@ protected:
}
}
- void TestConstantQuantizationParams(const TensorInfo& info, const OffsetScalePair& params)
+ void TestConstantQuantizationParams(const TensorInfo& info,
+ const OffsetScalePair& params,
+ DataType dataType = DataType::QuantisedAsymm8)
{
TestQuantizationParamsImpl(info, DataType::QuantisedAsymm8, params.first, params.second);
}
+ void TestBiasQuantizationParams(const TensorInfo& info,
+ const OffsetScalePair& qAsymm8Params,
+ const OffsetScalePair& qSymm16Params,
+ DataType dataType = DataType::QuantisedAsymm8)
+ {
+ switch (m_QuantizerOptions.m_ActivationFormat)
+ {
+ case DataType::QuantisedAsymm8:
+ TestQuantizationParamsImpl(info, dataType, qAsymm8Params.first, qAsymm8Params.second);
+ break;
+ case DataType::QuantisedSymm16:
+ TestQuantizationParamsImpl(info, dataType, qSymm16Params.first, qSymm16Params.second);
+ break;
+ default:
+ throw InvalidArgumentException("Unsupported quantization target");
+ }
+ }
+
+ void TestQuantizationOnLayersWithBiases(const IConnectableLayer* layer,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases)
+ {
+ TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
+ float inputScaleQAsymm8 = 30.0f / g_Asymm8QuantizationBase;
+ float inputScaleQSymm16 = 15.0f / g_Symm16QuantizationBase;
+ float weightsScale = 3.0f / g_Asymm8QuantizationBase;
+
+ // Based off default static range [-15.0f, 15.0f]
+ TestQuantizationParams(info, {inputScaleQAsymm8, 128}, {inputScaleQSymm16, 0});
+
+ TestConstantQuantizationParams(weights.GetInfo(), {weightsScale, 85});
+
+ if (biases.has_value())
+ {
+ TestBiasQuantizationParams(biases.value().GetInfo(),
+ {inputScaleQAsymm8 * weightsScale, 0},
+ {inputScaleQSymm16 * weightsScale, 0},
+ DataType::Signed32);
+ }
+ }
+
TensorShape m_InputShape;
TensorShape m_OutputShape;
@@ -726,18 +769,7 @@ void ValidateFullyConnectedLayer(const bool biasEnabled)
const Optional<ConstTensor>& biases,
const char* name = nullptr) override
{
- TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
-
- // Based off default static range [-15.0f, 15.0f]
- TestQuantizationParams(
- info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0});
-
- TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-
- if (biases.has_value())
- {
- TestConstantQuantizationParams(biases.value().GetInfo(), {30.0f / g_Asymm8QuantizationBase, 0});
- }
+ TestQuantizationOnLayersWithBiases(layer, weights, biases);
}
};
@@ -783,18 +815,7 @@ void TestQuantizeConvolution2d(bool useBiases)
const Optional<ConstTensor>& biases,
const char *name = nullptr) override
{
- TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
-
- // Based off default static range [-15.0f, 15.0f]
- TestQuantizationParams(
- info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0});
-
- TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-
- if (biases.has_value())
- {
- TestConstantQuantizationParams(biases.value().GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
- }
+ TestQuantizationOnLayersWithBiases(layer, weights, biases);
}
};
@@ -869,18 +890,7 @@ void TestQuantizeDepthwiseConvolution2d(bool useBiases)
const Optional<ConstTensor>& biases,
const char *name = nullptr) override
{
- TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo();
-
- // Based off default static range [-15.0f, 15.0f]
- TestQuantizationParams(
- info, {30.0f / g_Asymm8QuantizationBase, 128}, {15.0f / g_Symm16QuantizationBase, 0});
-
- TestConstantQuantizationParams(weights.GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
-
- if (biases.has_value())
- {
- TestConstantQuantizationParams(biases.value().GetInfo(), {3.0f / g_Asymm8QuantizationBase, 85});
- }
+ TestQuantizationOnLayersWithBiases(layer, weights, biases);
}
};