aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/QuantizerVisitor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/QuantizerVisitor.cpp')
-rw-r--r--src/armnn/QuantizerVisitor.cpp59
1 files changed, 53 insertions, 6 deletions
diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp
index 47ddc4ed29..7ba56757c2 100644
--- a/src/armnn/QuantizerVisitor.cpp
+++ b/src/armnn/QuantizerVisitor.cpp
@@ -24,12 +24,15 @@ QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker,
void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* srcLayer,
IConnectableLayer* quantizedLayer)
{
+ BOOST_ASSERT(srcLayer);
for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++)
{
const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i);
const InputSlot* inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+ BOOST_ASSERT(inputSlot);
const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
+ BOOST_ASSERT(outputSlot);
unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
Layer& layerToFind = outputSlot->GetOwningLayer();
@@ -60,6 +63,50 @@ void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* src
}
}
+ConstTensor QuantizerVisitor::CreateQuantizedBias(const IConnectableLayer* srcLayer,
+ const ConstTensor& weights,
+ const Optional<ConstTensor>& biases,
+ std::vector<int32_t>& backing)
+{
+ BOOST_ASSERT(srcLayer);
+ const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0);
+ auto inputSlot = boost::polymorphic_downcast<const InputSlot*>(&srcInputSlot);
+ BOOST_ASSERT(inputSlot);
+ const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
+
+ BOOST_ASSERT(outputSlot);
+ unsigned int slotIdx = outputSlot->CalculateIndexOnOwner();
+ Layer& layerToFind = outputSlot->GetOwningLayer();
+
+ auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid());
+ if (found == m_OriginalToQuantizedGuidMap.end())
+ {
+ // Error in graph traversal order
+ BOOST_ASSERT_MSG(false, "Error in graph traversal");
+ return biases.value();
+ }
+
+ // Fetch the min/max ranges that were computed earlier
+ auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx);
+ OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second);
+
+ // Get the quantization scale based on input and weight scale
+ float scale = qParams.first * weights.GetInfo().GetQuantizationScale();
+
+ // Set up quantized bias tensor info and allocate space
+ TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0);
+ backing.resize(biases.value().GetInfo().GetNumElements());
+
+ // Convert values to int32
+ for (size_t i = 0; i < backing.size(); ++i)
+ {
+ float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i];
+ backing[i] = boost::numeric_cast<int32_t>(fp32Value * ( 1 / scale ));
+ }
+
+ return ConstTensor(qInfo, backing);
+}
+
void QuantizerVisitor::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer)
{
m_OriginalToQuantizedGuidMap[srcLayer->GetGuid()] = quantizedLayer->GetGuid();
@@ -151,11 +198,11 @@ void QuantizerVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer,
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
Optional<ConstTensor> optionalQBiases;
- std::vector<uint8_t> biasesBacking;
+ std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
- ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+ ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
@@ -177,11 +224,11 @@ void QuantizerVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer*
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
Optional<ConstTensor> optionalQBiases;
- std::vector<uint8_t> biasesBacking;
+ std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
- ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+ ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}
@@ -203,11 +250,11 @@ void QuantizerVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer,
std::vector<uint8_t> weightsBacking;
ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking);
Optional<ConstTensor> optionalQBiases;
- std::vector<uint8_t> biasesBacking;
+ std::vector<int32_t> biasesBacking;
if (biases.has_value())
{
- ConstTensor qBiases = CreateQuantizedConst(biases.value(), biasesBacking);
+ ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking);
optionalQBiases = Optional<ConstTensor>(qBiases);
}