diff options
Diffstat (limited to 'src/armnn/QuantizerStrategy.cpp')
-rw-r--r-- | src/armnn/QuantizerStrategy.cpp | 519 |
1 files changed, 0 insertions, 519 deletions
diff --git a/src/armnn/QuantizerStrategy.cpp b/src/armnn/QuantizerStrategy.cpp deleted file mode 100644 index df20749072..0000000000 --- a/src/armnn/QuantizerStrategy.cpp +++ /dev/null @@ -1,519 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "QuantizerStrategy.hpp" -#include "armnn/utility/PolymorphicDowncast.hpp" - -namespace armnn -{ - -QuantizerStrategy::QuantizerStrategy(const RangeTracker& rangeTracker, - const IQuantizationScheme* quantizationScheme, - bool preserveType) - : m_Ranges(rangeTracker) - , m_QuantizedNetwork(INetwork::Create()) - , m_QuantizationScheme(quantizationScheme) - , m_PreserveType(preserveType) -{ -} - -void QuantizerStrategy::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, - IConnectableLayer* quantizedLayer) -{ - ARMNN_ASSERT(srcLayer); - for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) - { - const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); - const InputSlot* inputSlot = static_cast<const InputSlot*>(&srcInputSlot); - ARMNN_ASSERT(inputSlot); - const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); - - ARMNN_ASSERT(outputSlot); - unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); - Layer& layerToFind = outputSlot->GetOwningLayer(); - - auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); - if (found == m_OriginalToQuantizedGuidMap.end()) - { - // Error in graph traversal order - ARMNN_ASSERT_MSG(false, "Error in graph traversal"); - return; - } - - // Connect the slots in the quantized model - IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second]; - IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i); - IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx); - newOutputSlot.Connect(newInputSlot); - TensorInfo info(outputSlot->GetTensorInfo()); - - // Only try to set quantization params on tensors that can be quantized - if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean && - inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 && - inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64) - { - // Fetch the min/max ranges that were computed earlier - auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); - info.SetDataType(m_QuantizationScheme->GetDataType()); - info.SetQuantizationOffset(qParams.second); - info.SetQuantizationScale(qParams.first); - } - newOutputSlot.SetTensorInfo(info); - } -} - -ConstTensor QuantizerStrategy::CreateQuantizedBias(const IConnectableLayer* srcLayer, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - std::vector<int32_t>& backing) -{ - ARMNN_ASSERT(srcLayer); - const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); - auto inputSlot = static_cast<const InputSlot*>(&srcInputSlot); - ARMNN_ASSERT(inputSlot); - const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); - - ARMNN_ASSERT(outputSlot); - unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); - Layer& layerToFind = outputSlot->GetOwningLayer(); - - auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); - if (found == m_OriginalToQuantizedGuidMap.end()) - { - // Error in graph traversal order - ARMNN_ASSERT_MSG(false, "Error in graph traversal"); - return biases.value(); - } - - // Fetch the min/max ranges that were computed earlier - auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); - - // Get the quantization scale based on input and weight scale - float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); - - // Set up quantized bias tensor info and allocate space - TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); - backing.resize(biases.value().GetInfo().GetNumElements()); - - // Convert values to int32 - for (size_t i = 0; i < backing.size(); ++i) - { - float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i]; - backing[i] = armnn::numeric_cast<int32_t>(fp32Value * ( 1 / scale )); - } - - return ConstTensor(qInfo, backing); -} - -void QuantizerStrategy::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) -{ - m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid())); - m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer)); -} - -void QuantizerStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, - const BaseDescriptor& descriptor, - const std::vector<armnn::ConstTensor> &constants, - const char *name, - const armnn::LayerBindingId id) -{ - IgnoreUnused(id); - - IConnectableLayer* newLayer; - - switch (layer->GetType()) - { - case armnn::LayerType::Addition : - { - newLayer = m_QuantizedNetwork->AddAdditionLayer(name); - break; - } - case armnn::LayerType::Activation : - { - const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor); - newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name); - break; - } - case armnn::LayerType::ArgMinMax : - { - ArgMinMaxDescriptor argMinMaxDescriptor = static_cast<const ArgMinMaxDescriptor&>(descriptor); - newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name); - break; - } - case armnn::LayerType::BatchNormalization : - { - - BatchNormalizationDescriptor batchNormalizationDescriptor = - static_cast<const BatchNormalizationDescriptor&>(descriptor); - std::vector<uint8_t> meanBacking; - ConstTensor qMean = CreateQuantizedConst(constants[0], meanBacking); - - std::vector<uint8_t> varianceBacking; - ConstTensor qVariance = CreateQuantizedConst(constants[1], varianceBacking); - - std::vector<uint8_t> betaBacking; - ConstTensor qBeta = CreateQuantizedConst(constants[2], betaBacking); - - std::vector<uint8_t> gammaBacking; - ConstTensor qGamma = CreateQuantizedConst(constants[3], gammaBacking); - - newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(batchNormalizationDescriptor, - qMean, - qVariance, - qBeta, - qGamma, - name); - break; - } - case armnn::LayerType::BatchToSpaceNd : - { - BatchToSpaceNdDescriptor batchToSpaceNdDescriptor = - static_cast<const BatchToSpaceNdDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name); - break; - } - case armnn::LayerType::Comparison : - { - ComparisonDescriptor comparisonDescriptor =static_cast<const ComparisonDescriptor&>(descriptor); - newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name); - break; - } - case armnn::LayerType::Concat : - { - OriginsDescriptor originsDescriptor = static_cast<const OriginsDescriptor&>(descriptor); - newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name); - break; - } - case armnn::LayerType::Constant : - { - std::vector<uint8_t> inputBacking; - ConstTensor qInput = CreateQuantizedConst(constants[0], inputBacking); - - newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name); - break; - } - case armnn::LayerType::Convolution2d : - { - const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? - armnn::Optional<ConstTensor>{} : - armnn::Optional<ConstTensor>(constants[1]); - - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - Optional<ConstTensor> optionalQBiases; - std::vector<int32_t> biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - Convolution2dDescriptor convolution2dDescriptor = static_cast<const Convolution2dDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::DepthToSpace : - { - DepthToSpaceDescriptor depthToSpaceDescriptor = static_cast<const DepthToSpaceDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(depthToSpaceDescriptor, name); - break; - } - case armnn::LayerType::DepthwiseConvolution2d : - { - DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor = - static_cast<const DepthwiseConvolution2dDescriptor&>(descriptor); - - const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? - armnn::Optional<ConstTensor>{} : - armnn::Optional<ConstTensor>(constants[1]); - - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - Optional<ConstTensor> optionalQBiases; - std::vector<int32_t> biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer( - depthwiseConvolution2dDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::ElementwiseUnary : - { - ElementwiseUnaryDescriptor elementwiseUnaryDescriptor = - static_cast<const ElementwiseUnaryDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name); - break; - } - case armnn::LayerType::Fill : - { - FillDescriptor fillDescriptor = static_cast<const FillDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddFillLayer(fillDescriptor, name); - break; - } - case armnn::LayerType::FullyConnected : - { - FullyConnectedDescriptor fullyConnectedDescriptor = - static_cast<const FullyConnectedDescriptor&>(descriptor); - - const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? - armnn::Optional<ConstTensor>{} : - armnn::Optional<ConstTensor>(constants[1]); - - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - Optional<ConstTensor> optionalQBiases; - std::vector<int32_t> biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(fullyConnectedDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::Input : - { - const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); - IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name); - - if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) - { - IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer(); - inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0)); - inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo()); - RecordLayer(layer, quantizeLayer); - return; - } - else - { - RecordLayer(layer, inputLayer); - return; - } - } - case armnn::LayerType::InstanceNormalization : - { - InstanceNormalizationDescriptor instanceNormalizationDescriptor = - static_cast<const InstanceNormalizationDescriptor&>(descriptor); - - newLayer = - m_QuantizedNetwork->AddInstanceNormalizationLayer(instanceNormalizationDescriptor, name); - break; - } - case armnn::LayerType::LogSoftmax : - { - LogSoftmaxDescriptor logSoftmaxDescriptor = static_cast<const LogSoftmaxDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name); - break; - } - case armnn::LayerType::Mean : - { - MeanDescriptor meanDescriptor = static_cast<const MeanDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name); - break; - } - case armnn::LayerType::Multiplication : - { - newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name); - break; - } - case armnn::LayerType::Normalization : - { - NormalizationDescriptor normalizationDescriptor = static_cast<const NormalizationDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name); - break; - } - case armnn::LayerType::Output : - { - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - const DataType& dataType = info.GetDataType(); - newLayer = m_QuantizedNetwork->AddOutputLayer(id, name); - - if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) - { - IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer(); - RecordLayer(layer, dequantizeLayer); - SetQuantizedInputConnections(layer, dequantizeLayer); - dequantizeLayer->GetOutputSlot(0).Connect(newLayer->GetInputSlot(0)); - dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info); - return; - } - else - { - break; - } - } - case armnn::LayerType::Pad : - { - PadDescriptor padDescriptor = static_cast<const PadDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name); - break; - } - case armnn::LayerType::Permute : - { - PermuteDescriptor permuteDescriptor = static_cast<const PermuteDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name); - break; - } - case armnn::LayerType::Pooling2d : - { - Pooling2dDescriptor pooling2dDescriptor = static_cast<const Pooling2dDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name); - break; - } - case armnn::LayerType::Prelu : - { - newLayer = m_QuantizedNetwork->AddPreluLayer(name); - break; - } - case armnn::LayerType::Reshape : - { - ReshapeDescriptor reshapeDescriptor = static_cast<const ReshapeDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name); - break; - } - case armnn::LayerType::Resize : - { - - ResizeBilinearDescriptor resizeBilinearDescriptor = - static_cast<const ResizeBilinearDescriptor&>(descriptor); - - ResizeDescriptor resizeDescriptor; - resizeDescriptor.m_Method = ResizeMethod::Bilinear; - resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth; - resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight; - resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout; - - newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name); - break; - } - case armnn::LayerType::Slice : - { - SliceDescriptor sliceDescriptor = static_cast<const SliceDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name); - break; - } - case armnn::LayerType::Softmax : - { - SoftmaxDescriptor softmaxDescriptor = static_cast<const SoftmaxDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name); - break; - } - case armnn::LayerType::SpaceToBatchNd : - { - SpaceToBatchNdDescriptor spaceToBatchNdDescriptor = - static_cast<const SpaceToBatchNdDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name); - break; - } - case armnn::LayerType::SpaceToDepth : - { - SpaceToDepthDescriptor spaceToDepthDescriptor = static_cast<const SpaceToDepthDescriptor&>(descriptor); - newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name); - break; - } - case armnn::LayerType::Splitter : - { - SplitterDescriptor splitterDescriptor = static_cast<const SplitterDescriptor&>(descriptor); - newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name); - break; - } - case armnn::LayerType::Stack : - { - StackDescriptor stackDescriptor = static_cast<const StackDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name); - break; - } - case armnn::LayerType::StridedSlice : - { - StridedSliceDescriptor stridedSliceDescriptor = static_cast<const StridedSliceDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name); - break; - } - case armnn::LayerType::Subtraction : - { - newLayer = m_QuantizedNetwork->AddSubtractionLayer( name); - break; - } - case armnn::LayerType::TransposeConvolution2d : - { - - const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? - armnn::Optional<ConstTensor>{} : - armnn::Optional<ConstTensor>(constants[1]); - // quantize weights - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - - // quantize biases - std::vector<int32_t> biasesBacking; - Optional<ConstTensor> optionalQBiases; - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - TransposeConvolution2dDescriptor transposeConvolution2dDescriptor = - static_cast<const TransposeConvolution2dDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(transposeConvolution2dDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::Transpose : - { - TransposeDescriptor transposeDescriptor = static_cast<const TransposeDescriptor&>(descriptor); - - newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name); - break; - } - default: - { - throw UnimplementedException("Unimplemented layer encountered"); - } - } - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -} - |