// // Copyright © 2021 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "DynamicQuantizationStrategy.hpp" #include "NetworkUtils.hpp" #include #include #include #include #include namespace armnn { DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph) : m_RangeTracker(rangeTracker), m_Graph(graph) {} void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) { m_RangeTracker.SetRange(layer, outputIdx, min, max); } void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer) { for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) { const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection(); LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid(); unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner(); const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex); SetRange(layer, i, parentRange.first, parentRange.second); } } void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer) { m_LayersToCalibrate.push_back(layer); } void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer) { m_LayersNotToCalibrate.push_back(layer); } void DynamicQuantizationStrategy::FinishStrategy() { for (const IConnectableLayer* layer : m_LayersToCalibrate) { std::vector newDebugLayers = InsertDebugLayerAfter( m_Graph, *PolymorphicDowncast(const_cast(layer))); // record them so we can take them out again efficiently afterward m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers)); } } void DynamicQuantizationStrategy::RemoveDebugLayers() { for (DebugLayer* debugLayer : m_DebugLayers) { OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot(); proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0)); for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections()) { debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot); proceedingOutputSlot.Connect(*succeedingInputSlot); } m_Graph.EraseLayer(debugLayer); } m_DebugLayers.clear(); } void DynamicQuantizationStrategy::VisitNonCalibratedLayers() { RemoveDebugLayers(); for (const IConnectableLayer* layer : m_LayersNotToCalibrate) { ForwardParentParameters(layer); } } void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) { IgnoreUnused(name); IgnoreUnused(id); IgnoreUnused(descriptor); switch (layer->GetType()) { case armnn::LayerType::Activation : { const ActivationDescriptor& activationDescriptor = static_cast(descriptor); switch (activationDescriptor.m_Function) { // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu case ActivationFunction::Abs: case ActivationFunction::Linear: case ActivationFunction::ReLu: case ActivationFunction::SoftReLu: SetRange(layer, 0, 0.f, 15.f); break; case ActivationFunction::BoundedReLu: SetRange(layer, 0, 0.f, activationDescriptor.m_A); break; case ActivationFunction::TanH: SetRange(layer, 0, -1.f, 1.f); break; case ActivationFunction::LeakyReLu: SetRange(layer, 0, -5.f, 15.f); break; default: SetRange(layer, 0, -15.f, 15.f); break; } break; } case armnn::LayerType::Addition : { SetRange(layer, 0, -20.f, 20.f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::ArgMinMax : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::BatchNormalization : { SetRange(layer, 0, -15.0f, 15.0f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::Normalization: { SetRange(layer, 0, -15.0f, 15.0f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::Convolution2d: { SetRange(layer, 0, -15.0f, 15.0f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::DepthwiseConvolution2d: { SetRange(layer, 0, -15.0f, 15.0f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::FullyConnected : { SetRange(layer, 0, -15.0f, 15.0f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::Permute : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::SpaceToBatchNd : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::Pooling2d : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::Softmax : { SetRange(layer, 0, 0.f, 1.f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::Constant : { if (constants[0].GetDataType() != DataType::Float32) { throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); } // Work out the range based on the input constants unsigned int inputNumElements = constants[0].GetNumElements(); const float* inputData = reinterpret_cast(constants[0].GetMemoryArea()); float min = std::numeric_limits::max(); float max = std::numeric_limits::lowest(); for (unsigned int i = 0; i < inputNumElements; i++) { const float inputValue = inputData[i]; min = std::min(min, inputValue); max = std::max(max, inputValue); } SetRange(layer, 0, min, max); break; } case armnn::LayerType::Concat : { float min = std::numeric_limits::max(); float max = std::numeric_limits::lowest(); for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) { const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); LayerGuid layerId = outputSlot->GetOwningLayerGuid(); unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); min = std::min(min, range.first); max = std::max(max, range.second); } SetRange(layer, 0, min, max); AddToCalibratedLayers(layer); break; } case armnn::LayerType::Reshape : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::Splitter : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::Resize : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::StridedSlice : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::BatchToSpaceNd : { AddToNonCalibratedLayers(layer); break; } case armnn::LayerType::Input : { SetRange(layer, 0, -0.0f, 0.0f); AddToCalibratedLayers(layer); break; } case armnn::LayerType::Output : { AddToNonCalibratedLayers(layer); m_OutputLayers.push_back(id); break; } default: {} } } const std::vector& DynamicQuantizationStrategy::GetOutputLayers() { return m_OutputLayers; } } //namespace armnn