diff options
Diffstat (limited to 'src/armnn/QuantizerStrategy.cpp')
-rw-r--r-- | src/armnn/QuantizerStrategy.cpp | 519 |
1 files changed, 519 insertions, 0 deletions
diff --git a/src/armnn/QuantizerStrategy.cpp b/src/armnn/QuantizerStrategy.cpp new file mode 100644 index 0000000000..df20749072 --- /dev/null +++ b/src/armnn/QuantizerStrategy.cpp @@ -0,0 +1,519 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "QuantizerStrategy.hpp" +#include "armnn/utility/PolymorphicDowncast.hpp" + +namespace armnn +{ + +QuantizerStrategy::QuantizerStrategy(const RangeTracker& rangeTracker, + const IQuantizationScheme* quantizationScheme, + bool preserveType) + : m_Ranges(rangeTracker) + , m_QuantizedNetwork(INetwork::Create()) + , m_QuantizationScheme(quantizationScheme) + , m_PreserveType(preserveType) +{ +} + +void QuantizerStrategy::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, + IConnectableLayer* quantizedLayer) +{ + ARMNN_ASSERT(srcLayer); + for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) + { + const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); + const InputSlot* inputSlot = static_cast<const InputSlot*>(&srcInputSlot); + ARMNN_ASSERT(inputSlot); + const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); + + ARMNN_ASSERT(outputSlot); + unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); + Layer& layerToFind = outputSlot->GetOwningLayer(); + + auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); + if (found == m_OriginalToQuantizedGuidMap.end()) + { + // Error in graph traversal order + ARMNN_ASSERT_MSG(false, "Error in graph traversal"); + return; + } + + // Connect the slots in the quantized model + IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second]; + IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i); + IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx); + newOutputSlot.Connect(newInputSlot); + TensorInfo info(outputSlot->GetTensorInfo()); + + // Only try to set quantization params on tensors that can be quantized + if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean && + inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 && + inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64) + { + // Fetch the min/max ranges that were computed earlier + auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); + OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); + info.SetDataType(m_QuantizationScheme->GetDataType()); + info.SetQuantizationOffset(qParams.second); + info.SetQuantizationScale(qParams.first); + } + newOutputSlot.SetTensorInfo(info); + } +} + +ConstTensor QuantizerStrategy::CreateQuantizedBias(const IConnectableLayer* srcLayer, + const ConstTensor& weights, + const Optional<ConstTensor>& biases, + std::vector<int32_t>& backing) +{ + ARMNN_ASSERT(srcLayer); + const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); + auto inputSlot = static_cast<const InputSlot*>(&srcInputSlot); + ARMNN_ASSERT(inputSlot); + const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); + + ARMNN_ASSERT(outputSlot); + unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); + Layer& layerToFind = outputSlot->GetOwningLayer(); + + auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); + if (found == m_OriginalToQuantizedGuidMap.end()) + { + // Error in graph traversal order + ARMNN_ASSERT_MSG(false, "Error in graph traversal"); + return biases.value(); + } + + // Fetch the min/max ranges that were computed earlier + auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); + OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); + + // Get the quantization scale based on input and weight scale + float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); + + // Set up quantized bias tensor info and allocate space + TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); + backing.resize(biases.value().GetInfo().GetNumElements()); + + // Convert values to int32 + for (size_t i = 0; i < backing.size(); ++i) + { + float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i]; + backing[i] = armnn::numeric_cast<int32_t>(fp32Value * ( 1 / scale )); + } + + return ConstTensor(qInfo, backing); +} + +void QuantizerStrategy::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) +{ + m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid())); + m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer)); +} + +void QuantizerStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor> &constants, + const char *name, + const armnn::LayerBindingId id) +{ + IgnoreUnused(id); + + IConnectableLayer* newLayer; + + switch (layer->GetType()) + { + case armnn::LayerType::Addition : + { + newLayer = m_QuantizedNetwork->AddAdditionLayer(name); + break; + } + case armnn::LayerType::Activation : + { + const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name); + break; + } + case armnn::LayerType::ArgMinMax : + { + ArgMinMaxDescriptor argMinMaxDescriptor = static_cast<const ArgMinMaxDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name); + break; + } + case armnn::LayerType::BatchNormalization : + { + + BatchNormalizationDescriptor batchNormalizationDescriptor = + static_cast<const BatchNormalizationDescriptor&>(descriptor); + std::vector<uint8_t> meanBacking; + ConstTensor qMean = CreateQuantizedConst(constants[0], meanBacking); + + std::vector<uint8_t> varianceBacking; + ConstTensor qVariance = CreateQuantizedConst(constants[1], varianceBacking); + + std::vector<uint8_t> betaBacking; + ConstTensor qBeta = CreateQuantizedConst(constants[2], betaBacking); + + std::vector<uint8_t> gammaBacking; + ConstTensor qGamma = CreateQuantizedConst(constants[3], gammaBacking); + + newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(batchNormalizationDescriptor, + qMean, + qVariance, + qBeta, + qGamma, + name); + break; + } + case armnn::LayerType::BatchToSpaceNd : + { + BatchToSpaceNdDescriptor batchToSpaceNdDescriptor = + static_cast<const BatchToSpaceNdDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name); + break; + } + case armnn::LayerType::Comparison : + { + ComparisonDescriptor comparisonDescriptor =static_cast<const ComparisonDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name); + break; + } + case armnn::LayerType::Concat : + { + OriginsDescriptor originsDescriptor = static_cast<const OriginsDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name); + break; + } + case armnn::LayerType::Constant : + { + std::vector<uint8_t> inputBacking; + ConstTensor qInput = CreateQuantizedConst(constants[0], inputBacking); + + newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name); + break; + } + case armnn::LayerType::Convolution2d : + { + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + Optional<ConstTensor> optionalQBiases; + std::vector<int32_t> biasesBacking; + + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + Convolution2dDescriptor convolution2dDescriptor = static_cast<const Convolution2dDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::DepthToSpace : + { + DepthToSpaceDescriptor depthToSpaceDescriptor = static_cast<const DepthToSpaceDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(depthToSpaceDescriptor, name); + break; + } + case armnn::LayerType::DepthwiseConvolution2d : + { + DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor = + static_cast<const DepthwiseConvolution2dDescriptor&>(descriptor); + + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + Optional<ConstTensor> optionalQBiases; + std::vector<int32_t> biasesBacking; + + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + + newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer( + depthwiseConvolution2dDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::ElementwiseUnary : + { + ElementwiseUnaryDescriptor elementwiseUnaryDescriptor = + static_cast<const ElementwiseUnaryDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name); + break; + } + case armnn::LayerType::Fill : + { + FillDescriptor fillDescriptor = static_cast<const FillDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddFillLayer(fillDescriptor, name); + break; + } + case armnn::LayerType::FullyConnected : + { + FullyConnectedDescriptor fullyConnectedDescriptor = + static_cast<const FullyConnectedDescriptor&>(descriptor); + + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + Optional<ConstTensor> optionalQBiases; + std::vector<int32_t> biasesBacking; + + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + + newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(fullyConnectedDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::Input : + { + const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); + IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name); + + if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) + { + IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer(); + inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo()); + RecordLayer(layer, quantizeLayer); + return; + } + else + { + RecordLayer(layer, inputLayer); + return; + } + } + case armnn::LayerType::InstanceNormalization : + { + InstanceNormalizationDescriptor instanceNormalizationDescriptor = + static_cast<const InstanceNormalizationDescriptor&>(descriptor); + + newLayer = + m_QuantizedNetwork->AddInstanceNormalizationLayer(instanceNormalizationDescriptor, name); + break; + } + case armnn::LayerType::LogSoftmax : + { + LogSoftmaxDescriptor logSoftmaxDescriptor = static_cast<const LogSoftmaxDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name); + break; + } + case armnn::LayerType::Mean : + { + MeanDescriptor meanDescriptor = static_cast<const MeanDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name); + break; + } + case armnn::LayerType::Multiplication : + { + newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name); + break; + } + case armnn::LayerType::Normalization : + { + NormalizationDescriptor normalizationDescriptor = static_cast<const NormalizationDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name); + break; + } + case armnn::LayerType::Output : + { + const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + const DataType& dataType = info.GetDataType(); + newLayer = m_QuantizedNetwork->AddOutputLayer(id, name); + + if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) + { + IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer(); + RecordLayer(layer, dequantizeLayer); + SetQuantizedInputConnections(layer, dequantizeLayer); + dequantizeLayer->GetOutputSlot(0).Connect(newLayer->GetInputSlot(0)); + dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info); + return; + } + else + { + break; + } + } + case armnn::LayerType::Pad : + { + PadDescriptor padDescriptor = static_cast<const PadDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name); + break; + } + case armnn::LayerType::Permute : + { + PermuteDescriptor permuteDescriptor = static_cast<const PermuteDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name); + break; + } + case armnn::LayerType::Pooling2d : + { + Pooling2dDescriptor pooling2dDescriptor = static_cast<const Pooling2dDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name); + break; + } + case armnn::LayerType::Prelu : + { + newLayer = m_QuantizedNetwork->AddPreluLayer(name); + break; + } + case armnn::LayerType::Reshape : + { + ReshapeDescriptor reshapeDescriptor = static_cast<const ReshapeDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name); + break; + } + case armnn::LayerType::Resize : + { + + ResizeBilinearDescriptor resizeBilinearDescriptor = + static_cast<const ResizeBilinearDescriptor&>(descriptor); + + ResizeDescriptor resizeDescriptor; + resizeDescriptor.m_Method = ResizeMethod::Bilinear; + resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth; + resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight; + resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout; + + newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name); + break; + } + case armnn::LayerType::Slice : + { + SliceDescriptor sliceDescriptor = static_cast<const SliceDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name); + break; + } + case armnn::LayerType::Softmax : + { + SoftmaxDescriptor softmaxDescriptor = static_cast<const SoftmaxDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name); + break; + } + case armnn::LayerType::SpaceToBatchNd : + { + SpaceToBatchNdDescriptor spaceToBatchNdDescriptor = + static_cast<const SpaceToBatchNdDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name); + break; + } + case armnn::LayerType::SpaceToDepth : + { + SpaceToDepthDescriptor spaceToDepthDescriptor = static_cast<const SpaceToDepthDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name); + break; + } + case armnn::LayerType::Splitter : + { + SplitterDescriptor splitterDescriptor = static_cast<const SplitterDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name); + break; + } + case armnn::LayerType::Stack : + { + StackDescriptor stackDescriptor = static_cast<const StackDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name); + break; + } + case armnn::LayerType::StridedSlice : + { + StridedSliceDescriptor stridedSliceDescriptor = static_cast<const StridedSliceDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name); + break; + } + case armnn::LayerType::Subtraction : + { + newLayer = m_QuantizedNetwork->AddSubtractionLayer( name); + break; + } + case armnn::LayerType::TransposeConvolution2d : + { + + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + // quantize weights + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + + // quantize biases + std::vector<int32_t> biasesBacking; + Optional<ConstTensor> optionalQBiases; + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + + TransposeConvolution2dDescriptor transposeConvolution2dDescriptor = + static_cast<const TransposeConvolution2dDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(transposeConvolution2dDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::Transpose : + { + TransposeDescriptor transposeDescriptor = static_cast<const TransposeDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name); + break; + } + default: + { + throw UnimplementedException("Unimplemented layer encountered"); + } + } + RecordLayer(layer, newLayer); + SetQuantizedInputConnections(layer, newLayer); +} + +} + |