// // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "QuantizerStrategy.hpp" #include "armnn/utility/PolymorphicDowncast.hpp" namespace armnn { QuantizerStrategy::QuantizerStrategy(const RangeTracker& rangeTracker, const IQuantizationScheme* quantizationScheme, bool preserveType) : m_Ranges(rangeTracker) , m_QuantizedNetwork(INetwork::Create()) , m_QuantizationScheme(quantizationScheme) , m_PreserveType(preserveType) { } void QuantizerStrategy::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) { ARMNN_ASSERT(srcLayer); for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) { const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); const InputSlot* inputSlot = static_cast(&srcInputSlot); ARMNN_ASSERT(inputSlot); const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); ARMNN_ASSERT(outputSlot); unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); Layer& layerToFind = outputSlot->GetOwningLayer(); auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); if (found == m_OriginalToQuantizedGuidMap.end()) { // Error in graph traversal order ARMNN_ASSERT_MSG(false, "Error in graph traversal"); return; } // Connect the slots in the quantized model IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second]; IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i); IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx); newOutputSlot.Connect(newInputSlot); TensorInfo info(outputSlot->GetTensorInfo()); // Only try to set quantization params on tensors that can be quantized if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean && inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 && inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64) { // Fetch the min/max ranges that were computed earlier auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); info.SetDataType(m_QuantizationScheme->GetDataType()); info.SetQuantizationOffset(qParams.second); info.SetQuantizationScale(qParams.first); } newOutputSlot.SetTensorInfo(info); } } ConstTensor QuantizerStrategy::CreateQuantizedBias(const IConnectableLayer* srcLayer, const ConstTensor& weights, const Optional& biases, std::vector& backing) { ARMNN_ASSERT(srcLayer); const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); auto inputSlot = static_cast(&srcInputSlot); ARMNN_ASSERT(inputSlot); const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); ARMNN_ASSERT(outputSlot); unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); Layer& layerToFind = outputSlot->GetOwningLayer(); auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); if (found == m_OriginalToQuantizedGuidMap.end()) { // Error in graph traversal order ARMNN_ASSERT_MSG(false, "Error in graph traversal"); return biases.value(); } // Fetch the min/max ranges that were computed earlier auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); // Get the quantization scale based on input and weight scale float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); // Set up quantized bias tensor info and allocate space TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); backing.resize(biases.value().GetInfo().GetNumElements()); // Convert values to int32 for (size_t i = 0; i < backing.size(); ++i) { float fp32Value = static_cast(biases.value().GetMemoryArea())[i]; backing[i] = armnn::numeric_cast(fp32Value * ( 1 / scale )); } return ConstTensor(qInfo, backing); } void QuantizerStrategy::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) { m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid())); m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer)); } void QuantizerStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, const BaseDescriptor& descriptor, const std::vector &constants, const char *name, const armnn::LayerBindingId id) { IgnoreUnused(id); IConnectableLayer* newLayer; switch (layer->GetType()) { case armnn::LayerType::Addition : { newLayer = m_QuantizedNetwork->AddAdditionLayer(name); break; } case armnn::LayerType::Activation : { const ActivationDescriptor& activationDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name); break; } case armnn::LayerType::ArgMinMax : { ArgMinMaxDescriptor argMinMaxDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name); break; } case armnn::LayerType::BatchNormalization : { BatchNormalizationDescriptor batchNormalizationDescriptor = static_cast(descriptor); std::vector meanBacking; ConstTensor qMean = CreateQuantizedConst(constants[0], meanBacking); std::vector varianceBacking; ConstTensor qVariance = CreateQuantizedConst(constants[1], varianceBacking); std::vector betaBacking; ConstTensor qBeta = CreateQuantizedConst(constants[2], betaBacking); std::vector gammaBacking; ConstTensor qGamma = CreateQuantizedConst(constants[3], gammaBacking); newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(batchNormalizationDescriptor, qMean, qVariance, qBeta, qGamma, name); break; } case armnn::LayerType::BatchToSpaceNd : { BatchToSpaceNdDescriptor batchToSpaceNdDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name); break; } case armnn::LayerType::Comparison : { ComparisonDescriptor comparisonDescriptor =static_cast(descriptor); newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name); break; } case armnn::LayerType::Concat : { OriginsDescriptor originsDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name); break; } case armnn::LayerType::Constant : { std::vector inputBacking; ConstTensor qInput = CreateQuantizedConst(constants[0], inputBacking); newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name); break; } case armnn::LayerType::Convolution2d : { const armnn::Optional biases = constants.size() == 1 ? armnn::Optional{} : armnn::Optional(constants[1]); std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); Optional optionalQBiases; std::vector biasesBacking; if (biases.has_value()) { ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } Convolution2dDescriptor convolution2dDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor, qWeights, optionalQBiases, name); break; } case armnn::LayerType::DepthToSpace : { DepthToSpaceDescriptor depthToSpaceDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(depthToSpaceDescriptor, name); break; } case armnn::LayerType::DepthwiseConvolution2d : { DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor = static_cast(descriptor); const armnn::Optional biases = constants.size() == 1 ? armnn::Optional{} : armnn::Optional(constants[1]); std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); Optional optionalQBiases; std::vector biasesBacking; if (biases.has_value()) { ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer( depthwiseConvolution2dDescriptor, qWeights, optionalQBiases, name); break; } case armnn::LayerType::ElementwiseUnary : { ElementwiseUnaryDescriptor elementwiseUnaryDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name); break; } case armnn::LayerType::Fill : { FillDescriptor fillDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddFillLayer(fillDescriptor, name); break; } case armnn::LayerType::FullyConnected : { FullyConnectedDescriptor fullyConnectedDescriptor = static_cast(descriptor); const armnn::Optional biases = constants.size() == 1 ? armnn::Optional{} : armnn::Optional(constants[1]); std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); Optional optionalQBiases; std::vector biasesBacking; if (biases.has_value()) { ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(fullyConnectedDescriptor, qWeights, optionalQBiases, name); break; } case armnn::LayerType::Input : { const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name); if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) { IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer(); inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo()); RecordLayer(layer, quantizeLayer); return; } else { RecordLayer(layer, inputLayer); return; } } case armnn::LayerType::InstanceNormalization : { InstanceNormalizationDescriptor instanceNormalizationDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddInstanceNormalizationLayer(instanceNormalizationDescriptor, name); break; } case armnn::LayerType::LogSoftmax : { LogSoftmaxDescriptor logSoftmaxDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name); break; } case armnn::LayerType::Mean : { MeanDescriptor meanDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name); break; } case armnn::LayerType::Multiplication : { newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name); break; } case armnn::LayerType::Normalization : { NormalizationDescriptor normalizationDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name); break; } case armnn::LayerType::Output : { const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); const DataType& dataType = info.GetDataType(); newLayer = m_QuantizedNetwork->AddOutputLayer(id, name); if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) { IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer(); RecordLayer(layer, dequantizeLayer); SetQuantizedInputConnections(layer, dequantizeLayer); dequantizeLayer->GetOutputSlot(0).Connect(newLayer->GetInputSlot(0)); dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info); return; } else { break; } } case armnn::LayerType::Pad : { PadDescriptor padDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name); break; } case armnn::LayerType::Permute : { PermuteDescriptor permuteDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name); break; } case armnn::LayerType::Pooling2d : { Pooling2dDescriptor pooling2dDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name); break; } case armnn::LayerType::Prelu : { newLayer = m_QuantizedNetwork->AddPreluLayer(name); break; } case armnn::LayerType::Reshape : { ReshapeDescriptor reshapeDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name); break; } case armnn::LayerType::Resize : { ResizeBilinearDescriptor resizeBilinearDescriptor = static_cast(descriptor); ResizeDescriptor resizeDescriptor; resizeDescriptor.m_Method = ResizeMethod::Bilinear; resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth; resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight; resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout; newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name); break; } case armnn::LayerType::Slice : { SliceDescriptor sliceDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name); break; } case armnn::LayerType::Softmax : { SoftmaxDescriptor softmaxDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name); break; } case armnn::LayerType::SpaceToBatchNd : { SpaceToBatchNdDescriptor spaceToBatchNdDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name); break; } case armnn::LayerType::SpaceToDepth : { SpaceToDepthDescriptor spaceToDepthDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name); break; } case armnn::LayerType::Splitter : { SplitterDescriptor splitterDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name); break; } case armnn::LayerType::Stack : { StackDescriptor stackDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name); break; } case armnn::LayerType::StridedSlice : { StridedSliceDescriptor stridedSliceDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name); break; } case armnn::LayerType::Subtraction : { newLayer = m_QuantizedNetwork->AddSubtractionLayer( name); break; } case armnn::LayerType::TransposeConvolution2d : { const armnn::Optional biases = constants.size() == 1 ? armnn::Optional{} : armnn::Optional(constants[1]); // quantize weights std::vector weightsBacking; ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); // quantize biases std::vector biasesBacking; Optional optionalQBiases; if (biases.has_value()) { ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); optionalQBiases = Optional(qBiases); } TransposeConvolution2dDescriptor transposeConvolution2dDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(transposeConvolution2dDescriptor, qWeights, optionalQBiases, name); break; } case armnn::LayerType::Transpose : { TransposeDescriptor transposeDescriptor = static_cast(descriptor); newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name); break; } default: { throw UnimplementedException("Unimplemented layer encountered"); } } RecordLayer(layer, newLayer); SetQuantizedInputConnections(layer, newLayer); } }