diff options
Diffstat (limited to 'src/armnn')
55 files changed, 2357 insertions, 3464 deletions
diff --git a/src/armnn/DynamicQuantizationStrategy.cpp b/src/armnn/DynamicQuantizationStrategy.cpp new file mode 100644 index 0000000000..d354a0e441 --- /dev/null +++ b/src/armnn/DynamicQuantizationStrategy.cpp @@ -0,0 +1,276 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "DynamicQuantizationStrategy.hpp" +#include "NetworkUtils.hpp" + +#include <armnn/Descriptors.hpp> +#include <armnn/utility/IgnoreUnused.hpp> +#include <armnn/utility/PolymorphicDowncast.hpp> +#include <armnn/Types.hpp> + +#include <limits> + +namespace armnn +{ +DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph) + : m_RangeTracker(rangeTracker), + m_Graph(graph) +{} + +void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) +{ + m_RangeTracker.SetRange(layer, outputIdx, min, max); +} + +void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer) +{ + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) + { + const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection(); + LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid(); + unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner(); + const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex); + SetRange(layer, i, parentRange.first, parentRange.second); + } +} + +void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer) +{ + m_LayersToCalibrate.push_back(layer); +} + +void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer) +{ + m_LayersNotToCalibrate.push_back(layer); +} + +void DynamicQuantizationStrategy::FinishStrategy() +{ + for (const IConnectableLayer* layer : m_LayersToCalibrate) + { + std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter( + m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer))); + // record them so we can take them out again efficiently afterward + m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers)); + } +} + +void DynamicQuantizationStrategy::RemoveDebugLayers() +{ + for (DebugLayer* debugLayer : m_DebugLayers) + { + OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot(); + proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0)); + + for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections()) + { + debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot); + proceedingOutputSlot.Connect(*succeedingInputSlot); + } + m_Graph.EraseLayer(debugLayer); + } + m_DebugLayers.clear(); +} + +void DynamicQuantizationStrategy::VisitNonCalibratedLayers() { + RemoveDebugLayers(); + for (const IConnectableLayer* layer : m_LayersNotToCalibrate) + { + ForwardParentParameters(layer); + } +} + + +void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) +{ + IgnoreUnused(name); + IgnoreUnused(id); + IgnoreUnused(descriptor); + + switch (layer->GetType()) + { + case armnn::LayerType::Activation : + { + const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor); + switch (activationDescriptor.m_Function) + { + // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu + case ActivationFunction::Abs: + case ActivationFunction::Linear: + case ActivationFunction::ReLu: + case ActivationFunction::SoftReLu: + SetRange(layer, 0, 0.f, 15.f); + break; + case ActivationFunction::BoundedReLu: + SetRange(layer, 0, 0.f, activationDescriptor.m_A); + break; + case ActivationFunction::TanH: + SetRange(layer, 0, -1.f, 1.f); + break; + case ActivationFunction::LeakyReLu: + SetRange(layer, 0, -5.f, 15.f); + break; + default: + SetRange(layer, 0, -15.f, 15.f); + break; + } + break; + } + case armnn::LayerType::Addition : + { + SetRange(layer, 0, -20.f, 20.f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::ArgMinMax : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::BatchNormalization : + { + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::Normalization: + { + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::Convolution2d: + { + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::DepthwiseConvolution2d: + { + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::FullyConnected : + { + SetRange(layer, 0, -15.0f, 15.0f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::Permute : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::SpaceToBatchNd : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::Pooling2d : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::Softmax : + { + SetRange(layer, 0, 0.f, 1.f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::Constant : + { + if (constants[0].GetDataType() != DataType::Float32) + { + throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); + } + + // Work out the range based on the input constants + unsigned int inputNumElements = constants[0].GetNumElements(); + const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea()); + + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + + for (unsigned int i = 0; i < inputNumElements; i++) + { + const float inputValue = inputData[i]; + + min = std::min(min, inputValue); + max = std::max(max, inputValue); + } + SetRange(layer, 0, min, max); + break; + } + case armnn::LayerType::Concat : + { + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) + { + const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); + LayerGuid layerId = outputSlot->GetOwningLayerGuid(); + unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); + RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); + min = std::min(min, range.first); + max = std::max(max, range.second); + } + SetRange(layer, 0, min, max); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::Reshape : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::Splitter : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::Resize : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::StridedSlice : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::BatchToSpaceNd : + { + AddToNonCalibratedLayers(layer); + break; + } + case armnn::LayerType::Input : + { + SetRange(layer, 0, -0.0f, 0.0f); + AddToCalibratedLayers(layer); + break; + } + case armnn::LayerType::Output : + { + AddToNonCalibratedLayers(layer); + m_OutputLayers.push_back(id); + break; + } + default: + {} + } +} + +const std::vector<LayerBindingId>& DynamicQuantizationStrategy::GetOutputLayers() +{ + return m_OutputLayers; +} + +} //namespace armnn diff --git a/src/armnn/DynamicQuantizationStrategy.hpp b/src/armnn/DynamicQuantizationStrategy.hpp new file mode 100644 index 0000000000..aa77a4b563 --- /dev/null +++ b/src/armnn/DynamicQuantizationStrategy.hpp @@ -0,0 +1,59 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "armnn/LayerVisitorBase.hpp" +#include "RangeTracker.hpp" +#include "layers/DebugLayer.hpp" + +#include <armnn/INetwork.hpp> +#include <armnnQuantizer/INetworkQuantizer.hpp> + +namespace armnn +{ + +/// Visitor class implementation to gather the TensorInfo for LayerBindingID for creation of ConstTensor for Refine. +class DynamicQuantizationStrategy : public armnn::IStrategy +{ +public: + + DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph); + ~DynamicQuantizationStrategy() = default; + + virtual void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const armnn::BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id = 0) override; + + const std::vector<armnn::LayerBindingId>& GetOutputLayers(); + void VisitNonCalibratedLayers(); + void FinishStrategy() override; + + +private: + /// Set the range for an output slot on a layer + void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); + + void ForwardParentParameters(const IConnectableLayer* layer); + + /// Mapping from a layer Guid to an array of ranges for outputs + RangeTracker& m_RangeTracker; + + Graph& m_Graph; + + std::vector<const IConnectableLayer*> m_LayersToCalibrate; + std::vector<const IConnectableLayer*> m_LayersNotToCalibrate; + std::vector<DebugLayer*> m_DebugLayers; + + std::vector<armnn::LayerBindingId> m_OutputLayers; + void AddToCalibratedLayers(const IConnectableLayer* layer); + void AddToNonCalibratedLayers(const IConnectableLayer* layer); + void RemoveDebugLayers(); + + +}; +} //namespace armnn diff --git a/src/armnn/DynamicQuantizationVisitor.cpp b/src/armnn/DynamicQuantizationVisitor.cpp deleted file mode 100644 index 02e7699eed..0000000000 --- a/src/armnn/DynamicQuantizationVisitor.cpp +++ /dev/null @@ -1,364 +0,0 @@ -// -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "DynamicQuantizationVisitor.hpp" -#include "NetworkUtils.hpp" - -#include <armnn/Descriptors.hpp> -#include <armnn/utility/IgnoreUnused.hpp> -#include <armnn/utility/PolymorphicDowncast.hpp> -#include <armnn/Types.hpp> - -#include <limits> - -namespace armnn -{ - -DynamicQuantizationVisitor::DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph) - : m_RangeTracker(rangeTracker), - m_Graph(graph) -{} - -void DynamicQuantizationVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) -{ - m_RangeTracker.SetRange(layer, outputIdx, min, max); -} - -void DynamicQuantizationVisitor::ForwardParentParameters(const IConnectableLayer* layer) -{ - for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) - { - const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection(); - LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid(); - unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner(); - const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex); - SetRange(layer, i, parentRange.first, parentRange.second); - } -} - -void DynamicQuantizationVisitor::AddToCalibratedLayers(const IConnectableLayer* layer) -{ - m_LayersToCalibrate.push_back(layer); -} - -void DynamicQuantizationVisitor::AddToNonCalibratedLayers(const IConnectableLayer* layer) -{ - m_LayersNotToCalibrate.push_back(layer); -} - -void DynamicQuantizationVisitor::FinishVisit() -{ - for (const IConnectableLayer* layer : m_LayersToCalibrate) - { - std::vector<DebugLayer*> newDebugLayers = InsertDebugLayerAfter( - m_Graph, *PolymorphicDowncast<Layer*>(const_cast<IConnectableLayer*>(layer))); - // record them so we can take them out again efficiently afterward - m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers)); - } -} - -void DynamicQuantizationVisitor::RemoveDebugLayers() -{ - for (DebugLayer* debugLayer : m_DebugLayers) - { - OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot(); - proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0)); - - for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections()) - { - debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot); - proceedingOutputSlot.Connect(*succeedingInputSlot); - } - m_Graph.EraseLayer(debugLayer); - } - m_DebugLayers.clear(); -} - -void DynamicQuantizationVisitor::VisitNonCalibratedLayers() { - RemoveDebugLayers(); - for (const IConnectableLayer* layer : m_LayersNotToCalibrate) - { - ForwardParentParameters(layer); - } -} - -void DynamicQuantizationVisitor::VisitAdditionLayer(const IConnectableLayer* layer, - const char* name) -{ - IgnoreUnused(name); - SetRange(layer, 0, -20.f, 20.f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitAbsLayer(const IConnectableLayer* layer, - const char* name) -{ - IgnoreUnused(name); - SetRange(layer, 0, -20.f, 20.f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor& desc, - const char* name) -{ - IgnoreUnused(name); - IgnoreUnused(desc); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name) -{ - IgnoreUnused(desc); - IgnoreUnused(mean); - IgnoreUnused(variance); - IgnoreUnused(beta); - IgnoreUnused(gamma); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitNormalizationLayer(const IConnectableLayer* layer, - const NormalizationDescriptor& desc, - const char* name) -{ - IgnoreUnused(desc); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - IgnoreUnused(convolution2dDescriptor); - IgnoreUnused(weights); - IgnoreUnused(biases); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, - const DepthwiseConvolution2dDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - IgnoreUnused(desc); - IgnoreUnused(weights); - IgnoreUnused(biases); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& activationDescriptor, - const char* name) -{ - IgnoreUnused(name, activationDescriptor); - switch (activationDescriptor.m_Function) - { - // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu - case ActivationFunction::Abs: - case ActivationFunction::Linear: - case ActivationFunction::ReLu: - case ActivationFunction::SoftReLu: - SetRange(layer, 0, 0.f, 15.f); - break; - case ActivationFunction::BoundedReLu: - SetRange(layer, 0, 0.f, activationDescriptor.m_A); - break; - case ActivationFunction::TanH: - SetRange(layer, 0, -1.f, 1.f); - break; - case ActivationFunction::LeakyReLu: - SetRange(layer, 0, -5.f, 15.f); - break; - default: - SetRange(layer, 0, -15.f, 15.f); - break; - } - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name) -{ - IgnoreUnused(desc); - IgnoreUnused(weights); - IgnoreUnused(biases); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& permuteDescriptor, - const char* name) -{ - IgnoreUnused(permuteDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name) -{ - IgnoreUnused(spaceToBatchNdDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& pooling2dDescriptor, - const char* name) -{ - IgnoreUnused(pooling2dDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& softmaxDescriptor, - const char* name) -{ - IgnoreUnused(softmaxDescriptor); - IgnoreUnused(name); - SetRange(layer, 0, 0.f, 1.f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name) -{ - IgnoreUnused(name); - - if (input.GetDataType() != DataType::Float32) - { - throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); - } - - // Work out the range based on the input constants - unsigned int inputNumElements = input.GetNumElements(); - const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea()); - - float min = std::numeric_limits<float>::max(); - float max = std::numeric_limits<float>::lowest(); - - for (unsigned int i = 0; i < inputNumElements; i++) - { - const float inputValue = inputData[i]; - - min = std::min(min, inputValue); - max = std::max(max, inputValue); - } - SetRange(layer, 0, min, max); -} - -void DynamicQuantizationVisitor::VisitConcatLayer(const IConnectableLayer* layer, - const ConcatDescriptor& originsDescriptor, - const char* name) -{ - IgnoreUnused(name); - IgnoreUnused(originsDescriptor); - float min = std::numeric_limits<float>::max(); - float max = std::numeric_limits<float>::lowest(); - for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) - { - const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); - LayerGuid layerId = outputSlot->GetOwningLayerGuid(); - unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); - RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); - min = std::min(min, range.first); - max = std::max(max, range.second); - } - SetRange(layer, 0, min, max); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name) -{ - IgnoreUnused(reshapeDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& splitterDescriptor, - const char* name) -{ - IgnoreUnused(splitterDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer, - const ResizeBilinearDescriptor& resizeDesc, - const char* name) -{ - IgnoreUnused(resizeDesc); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& stridedSliceDescriptor, - const char* name) -{ - IgnoreUnused(stridedSliceDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name) -{ - IgnoreUnused(batchToSpaceNdDescriptor); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name) -{ - IgnoreUnused(id); - IgnoreUnused(name); - SetRange(layer, 0, -0.0f, 0.0f); - AddToCalibratedLayers(layer); -} - -void DynamicQuantizationVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name) -{ - IgnoreUnused(id); - IgnoreUnused(name); - AddToNonCalibratedLayers(layer); - m_OutputLayers.push_back(id); -} - -const std::vector<LayerBindingId>& DynamicQuantizationVisitor::GetOutputLayers() -{ - return m_OutputLayers; -} - -} //namespace armnn diff --git a/src/armnn/DynamicQuantizationVisitor.hpp b/src/armnn/DynamicQuantizationVisitor.hpp deleted file mode 100644 index 358e47187e..0000000000 --- a/src/armnn/DynamicQuantizationVisitor.hpp +++ /dev/null @@ -1,149 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "armnn/LayerVisitorBase.hpp" -#include "RangeTracker.hpp" -#include "layers/DebugLayer.hpp" - -#include <armnn/INetwork.hpp> -#include <armnnQuantizer/INetworkQuantizer.hpp> - -namespace armnn -{ - -/// Visitor class to establish min/max ranges based on the type of the layer -class DynamicQuantizationVisitor : public LayerVisitorBase<VisitorThrowingPolicy> -{ -public: - DynamicQuantizationVisitor(RangeTracker& rangeTracker, Graph& graph); - ~DynamicQuantizationVisitor() = default; - - /// Functions to set the Range on a per-layer-type basis - void VisitAbsLayer(const IConnectableLayer* layer, - const char* name = nullptr) override; - - void VisitAdditionLayer(const IConnectableLayer* layer, - const char* name = nullptr) override; - - void VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor& desc, - const char* name = nullptr) override; - - void VisitNormalizationLayer(const IConnectableLayer* layer, - const NormalizationDescriptor& desc, - const char* name = nullptr) override ; - - void VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name = nullptr) override; - - void VisitConvolution2dLayer(const IConnectableLayer* layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, - const DepthwiseConvolution2dDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& activationDescriptor, - const char* name = nullptr) override; - - void VisitFullyConnectedLayer(const IConnectableLayer *layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name) override; - - void VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& permuteDescriptor, - const char* name) override; - - void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name = nullptr) override; - - void VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& pooling2dDescriptor, - const char* name) override; - - void VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& softmaxDescriptor, - const char* name = nullptr) override; - - void VisitConcatLayer(const IConnectableLayer* layer, - const ConcatDescriptor& originsDescriptor, - const char* name = nullptr) override; - - void VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name = nullptr) override; - - void VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name = nullptr) override; - - void VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& splitterDescriptor, - const char* name = nullptr) override; - - void VisitResizeBilinearLayer(const IConnectableLayer* layer, - const ResizeBilinearDescriptor& resizeDesc, - const char* name = nullptr) override; - - void VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& stridedSliceDescriptor, - const char* name = nullptr) override; - - void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name = nullptr) override; - - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override; - - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override; - - void FinishVisit() override; - void VisitNonCalibratedLayers(); - - const std::vector<armnn::LayerBindingId>& GetOutputLayers(); - -private: - /// Set the range for an output slot on a layer - void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); - - void ForwardParentParameters(const IConnectableLayer* layer); - - /// Mapping from a layer Guid to an array of ranges for outputs - RangeTracker& m_RangeTracker; - - Graph& m_Graph; - - std::vector<const IConnectableLayer*> m_LayersToCalibrate; - std::vector<const IConnectableLayer*> m_LayersNotToCalibrate; - std::vector<DebugLayer*> m_DebugLayers; - - std::vector<armnn::LayerBindingId> m_OutputLayers; - - void AddToCalibratedLayers(const IConnectableLayer* layer); - void AddToNonCalibratedLayers(const IConnectableLayer* layer); - void RemoveDebugLayers(); -}; - -} //namespace armnn diff --git a/src/armnn/InternalTypes.hpp b/src/armnn/InternalTypes.hpp index 6e6559137c..9850520116 100644 --- a/src/armnn/InternalTypes.hpp +++ b/src/armnn/InternalTypes.hpp @@ -8,93 +8,9 @@ #include <array> - -/// This list uses X macro technique. -/// See https://en.wikipedia.org/wiki/X_Macro for more info -#define LIST_OF_LAYER_TYPE \ - X(Activation) \ - X(Addition) \ - X(ArgMinMax) \ - X(BatchNormalization) \ - X(BatchToSpaceNd) \ - X(Comparison) \ - X(Concat) \ - X(Constant) \ - X(ConvertBf16ToFp32) \ - X(ConvertFp16ToFp32) \ - X(ConvertFp32ToBf16) \ - X(ConvertFp32ToFp16) \ - X(Convolution2d) \ - X(Debug) \ - X(DepthToSpace) \ - X(DepthwiseConvolution2d) \ - X(Dequantize) \ - X(DetectionPostProcess) \ - X(Division) \ - X(ElementwiseUnary) \ - X(FakeQuantization) \ - X(Fill) \ - X(Floor) \ - X(FullyConnected) \ - X(Gather) \ - X(Input) \ - X(InstanceNormalization) \ - X(L2Normalization) \ - X(LogicalBinary) \ - X(LogSoftmax) \ - X(Lstm) \ - X(QLstm) \ - X(Map) \ - X(Maximum) \ - X(Mean) \ - X(MemCopy) \ - X(MemImport) \ - X(Merge) \ - X(Minimum) \ - X(Multiplication) \ - X(Normalization) \ - X(Output) \ - X(Pad) \ - X(Permute) \ - X(Pooling2d) \ - X(PreCompiled) \ - X(Prelu) \ - X(Quantize) \ - X(QuantizedLstm) \ - X(Reshape) \ - X(Rank) \ - X(Reduce) \ - X(Resize) \ - X(Slice) \ - X(Softmax) \ - X(SpaceToBatchNd) \ - X(SpaceToDepth) \ - X(Splitter) \ - X(Stack) \ - X(StandIn) \ - X(StridedSlice) \ - X(Subtraction) \ - X(Switch) \ - X(Transpose) \ - X(TransposeConvolution2d) \ - X(Unmap) - -/// When adding a new layer, adapt also the LastLayer enum value in the -/// enum class LayerType below namespace armnn { -enum class LayerType -{ -#define X(name) name, - LIST_OF_LAYER_TYPE -#undef X - FirstLayer = Activation, - LastLayer = Unmap -}; - -const char* GetLayerTypeAsCString(LayerType type); - using Coordinates = std::array<unsigned int, MaxNumOfTensorDimensions>; using Dimensions = std::array<unsigned int, MaxNumOfTensorDimensions>; diff --git a/src/armnn/Layer.cpp b/src/armnn/Layer.cpp index 9a526a0943..c9733e822b 100644 --- a/src/armnn/Layer.cpp +++ b/src/armnn/Layer.cpp @@ -473,4 +473,10 @@ void Layer::SerializeLayerParameters(ParameterStringifyFunction& fn) const } } +// default implementation of ExecuteStrategy +void Layer::ExecuteStrategy(IStrategy& strategy) const +{ + strategy.ExecuteStrategy(this, BaseDescriptor(), {}, GetName()); +} + } // namespace armnn diff --git a/src/armnn/Layer.hpp b/src/armnn/Layer.hpp index ef0f8c3297..2f5cacc3ce 100644 --- a/src/armnn/Layer.hpp +++ b/src/armnn/Layer.hpp @@ -214,6 +214,9 @@ public: Layer(unsigned int numInputSlots, unsigned int numOutputSlots, LayerType type, const char* name); Layer(unsigned int numInputSlots, unsigned int numOutputSlots, LayerType type, DataLayout layout, const char* name); + void ExecuteStrategy(IStrategy& strategy) const override; + + const std::string& GetNameStr() const { return m_LayerName; @@ -259,7 +262,7 @@ public: void ResetPriority() const; LayerPriority GetPriority() const; - LayerType GetType() const { return m_Type; } + LayerType GetType() const override { return m_Type; } DataType GetDataType() const; @@ -440,6 +443,11 @@ public: LayerBindingId GetBindingId() const { return m_Id; }; + void ExecuteStrategy(IStrategy& strategy) const override + { + strategy.ExecuteStrategy(this, BaseDescriptor(), {}, GetName(), GetBindingId()); + } + protected: ~BindableLayer() = default; diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index f8b0675f0d..bf7a056f6e 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -2021,6 +2021,14 @@ void Network::Accept(ILayerVisitor& visitor) const }; } +void Network::ExecuteStrategy(IStrategy& strategy) const +{ + for (auto layer : GetGraph()) + { + layer->ExecuteStrategy(strategy); + }; +} + OptimizedNetwork::OptimizedNetwork(std::unique_ptr<Graph> graph) : m_Graph(std::move(graph)), m_Guid(profiling::ProfilingService::GetNextGuid()) { diff --git a/src/armnn/Network.hpp b/src/armnn/Network.hpp index 1205bd847e..cffade5a21 100644 --- a/src/armnn/Network.hpp +++ b/src/armnn/Network.hpp @@ -258,6 +258,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + private: IConnectableLayer* AddFullyConnectedLayerImpl(const FullyConnectedDescriptor& fullyConnectedDescriptor, const ConstTensor& weights, diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp index e6becee96f..eed3f41bdc 100644 --- a/src/armnn/NetworkQuantizer.cpp +++ b/src/armnn/NetworkQuantizer.cpp @@ -8,9 +8,9 @@ #include "Graph.hpp" #include "Layer.hpp" #include "Network.hpp" -#include "DynamicQuantizationVisitor.hpp" -#include "StaticRangeVisitor.hpp" -#include "QuantizerVisitor.hpp" +#include "DynamicQuantizationStrategy.hpp" +#include "StaticRangeStrategy.hpp" +#include "QuantizerStrategy.hpp" #include "OverrideInputRangeVisitor.hpp" #include <TensorIOUtils.hpp> @@ -60,9 +60,9 @@ void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, flo void NetworkQuantizer::Refine(const InputTensors& inputTensors) { - // The first time Refine is called the m_Runtime and the DynamicQuantizationVisitor + // The first time Refine is called the m_Runtime and the DynamicQuantizationStrategy // will not have been created. Need to get the environment set up, Runtime loaded, - // DynamicQuantizationVisitor created and run over the network to initialise itself + // DynamicQuantizationStrategy created and run over the network to initialise itself // and the RangeTracker the Debug callback registered and an initial inference // done to set up the first min/max values if (!m_Runtime) @@ -71,15 +71,15 @@ void NetworkQuantizer::Refine(const InputTensors& inputTensors) m_Ranges.SetDynamicMode(true); const Graph& cGraph = PolymorphicDowncast<const Network*>(m_InputNetwork)->GetGraph().TopologicalSort(); - // need to insert Debug layers in the DynamicQuantizationVisitor + // need to insert Debug layers in the DynamicQuantizationStrategy Graph& graph = const_cast<Graph&>(cGraph); // Initialize RangeTracker to the default values for each layer. // The default values are overwritten by the min/max that is // recorded during the first dataset min/max calibration. This // initialisation is only required for the first call of Refine(). - m_DynamicQuantizationVisitor = DynamicQuantizationVisitor(m_Ranges, graph); - VisitLayers(cGraph, m_DynamicQuantizationVisitor.value()); + m_DynamicQuantizationStrategy = DynamicQuantizationStrategy(m_Ranges, graph); + ApplyStrategyToLayers(cGraph, m_DynamicQuantizationStrategy.value()); IRuntime::CreationOptions options; m_Runtime = IRuntime::Create(options); @@ -119,7 +119,7 @@ void NetworkQuantizer::Refine(const InputTensors& inputTensors) // Create output tensor for EnqueueWorkload std::vector<armnn::BindingPointInfo> outputBindings; - auto outputLayers = m_DynamicQuantizationVisitor.value().GetOutputLayers(); + auto outputLayers = m_DynamicQuantizationStrategy.value().GetOutputLayers(); std::vector<TContainer> outputVectors; for (auto outputLayerBindingId : outputLayers) { @@ -144,16 +144,16 @@ INetworkPtr NetworkQuantizer::ExportNetwork() if (!m_Runtime) { m_Ranges.SetDynamicMode(false); - StaticRangeVisitor rangeVisitor(m_Ranges); - VisitLayers(graph, rangeVisitor); + StaticRangeStrategy rangeStrategy(m_Ranges); + ApplyStrategyToLayers(graph, rangeStrategy); } else { // Set min/max range of non-calibrated layers to parent layer's range - m_DynamicQuantizationVisitor.value().VisitNonCalibratedLayers(); + m_DynamicQuantizationStrategy.value().VisitNonCalibratedLayers(); // now tear down the runtime and the dynamic visitor. m_Runtime.reset(nullptr); - m_DynamicQuantizationVisitor = EmptyOptional(); + m_DynamicQuantizationStrategy = EmptyOptional(); m_RefineCount = 0; } @@ -177,8 +177,8 @@ INetworkPtr NetworkQuantizer::ExportNetwork() throw InvalidArgumentException("Unsupported quantization target"); } - QuantizerVisitor quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType); - VisitLayers(graph, quantizerVisitor); + QuantizerStrategy quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType); + ApplyStrategyToLayers(graph, quantizerVisitor); // clear the ranges m_Ranges.Reset(); diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp index d384bdc545..a07ac8827e 100644 --- a/src/armnn/NetworkQuantizer.hpp +++ b/src/armnn/NetworkQuantizer.hpp @@ -11,7 +11,7 @@ #include <armnn/Types.hpp> #include <armnn/Optional.hpp> -#include "DynamicQuantizationVisitor.hpp" +#include "DynamicQuantizationStrategy.hpp" #include "RangeTracker.hpp" namespace armnn @@ -44,7 +44,7 @@ private: // the runtime between invocations of the Refine method. IRuntimePtr m_Runtime; - Optional<DynamicQuantizationVisitor> m_DynamicQuantizationVisitor; + Optional<DynamicQuantizationStrategy> m_DynamicQuantizationStrategy; // counts the number of times refine is called unsigned int m_RefineCount; diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp index dd274f9e35..5497e1b898 100644 --- a/src/armnn/NetworkQuantizerUtils.hpp +++ b/src/armnn/NetworkQuantizerUtils.hpp @@ -10,6 +10,7 @@ #include <armnn/Tensor.hpp> #include <armnn/TypesUtils.hpp> #include <armnn/ILayerVisitor.hpp> +#include <armnn/IStrategy.hpp> #include <armnn/utility/Assert.hpp> #include <utility> @@ -56,4 +57,14 @@ void VisitLayers(const LayerContainer& layerContainer, ILayerVisitor& visitor) visitor.FinishVisit(); } +template <typename LayerContainer> +void ApplyStrategyToLayers(const LayerContainer& layerContainer, IStrategy& strategy) +{ + for (auto layer : layerContainer) + { + layer->ExecuteStrategy(strategy); + } + strategy.FinishStrategy(); +} + } // namespace armnn diff --git a/src/armnn/OverrideInputRangeVisitor.hpp b/src/armnn/OverrideInputRangeVisitor.hpp index 511c851bef..196a3aab1d 100644 --- a/src/armnn/OverrideInputRangeVisitor.hpp +++ b/src/armnn/OverrideInputRangeVisitor.hpp @@ -13,6 +13,57 @@ namespace armnn { +class OverrideInputRangeStrategy : public IStrategy +{ +private: + using MinMaxRange = RangeTracker::MinMaxRange; +public : + OverrideInputRangeStrategy(RangeTracker& ranges, + LayerBindingId layerId, + const MinMaxRange& minMaxRange) + : m_Ranges(ranges) + , m_LayerId(layerId) + , m_MinMaxRange(minMaxRange){} + + ~OverrideInputRangeStrategy() = default; + + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override + { + IgnoreUnused(name, constants, id, descriptor); + + switch (layer->GetType()) + { + case armnn::LayerType::Input : + { + if (m_LayerId == id) + { + m_Ranges.SetRange(layer, 0, m_MinMaxRange.first, m_MinMaxRange.second); + } + break; + } + default: + { + std::cout << "dont know this one" << std::endl; + } + } + } + +private: + /// Mapping from a layer Guid to an array of ranges for outputs + RangeTracker& m_Ranges; + + /// The id of the input layer of which to override the input range + LayerBindingId m_LayerId; + + /// The new input range to be applied to the input layer + MinMaxRange m_MinMaxRange; +}; + + /// Visitor object for overriding the input range of the quantized input layers in a network class OverrideInputRangeVisitor : public LayerVisitorBase<VisitorNoThrowPolicy> diff --git a/src/armnn/QuantizerStrategy.cpp b/src/armnn/QuantizerStrategy.cpp new file mode 100644 index 0000000000..df20749072 --- /dev/null +++ b/src/armnn/QuantizerStrategy.cpp @@ -0,0 +1,519 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "QuantizerStrategy.hpp" +#include "armnn/utility/PolymorphicDowncast.hpp" + +namespace armnn +{ + +QuantizerStrategy::QuantizerStrategy(const RangeTracker& rangeTracker, + const IQuantizationScheme* quantizationScheme, + bool preserveType) + : m_Ranges(rangeTracker) + , m_QuantizedNetwork(INetwork::Create()) + , m_QuantizationScheme(quantizationScheme) + , m_PreserveType(preserveType) +{ +} + +void QuantizerStrategy::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, + IConnectableLayer* quantizedLayer) +{ + ARMNN_ASSERT(srcLayer); + for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) + { + const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); + const InputSlot* inputSlot = static_cast<const InputSlot*>(&srcInputSlot); + ARMNN_ASSERT(inputSlot); + const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); + + ARMNN_ASSERT(outputSlot); + unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); + Layer& layerToFind = outputSlot->GetOwningLayer(); + + auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); + if (found == m_OriginalToQuantizedGuidMap.end()) + { + // Error in graph traversal order + ARMNN_ASSERT_MSG(false, "Error in graph traversal"); + return; + } + + // Connect the slots in the quantized model + IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second]; + IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i); + IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx); + newOutputSlot.Connect(newInputSlot); + TensorInfo info(outputSlot->GetTensorInfo()); + + // Only try to set quantization params on tensors that can be quantized + if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean && + inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 && + inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64) + { + // Fetch the min/max ranges that were computed earlier + auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); + OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); + info.SetDataType(m_QuantizationScheme->GetDataType()); + info.SetQuantizationOffset(qParams.second); + info.SetQuantizationScale(qParams.first); + } + newOutputSlot.SetTensorInfo(info); + } +} + +ConstTensor QuantizerStrategy::CreateQuantizedBias(const IConnectableLayer* srcLayer, + const ConstTensor& weights, + const Optional<ConstTensor>& biases, + std::vector<int32_t>& backing) +{ + ARMNN_ASSERT(srcLayer); + const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); + auto inputSlot = static_cast<const InputSlot*>(&srcInputSlot); + ARMNN_ASSERT(inputSlot); + const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); + + ARMNN_ASSERT(outputSlot); + unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); + Layer& layerToFind = outputSlot->GetOwningLayer(); + + auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); + if (found == m_OriginalToQuantizedGuidMap.end()) + { + // Error in graph traversal order + ARMNN_ASSERT_MSG(false, "Error in graph traversal"); + return biases.value(); + } + + // Fetch the min/max ranges that were computed earlier + auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); + OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); + + // Get the quantization scale based on input and weight scale + float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); + + // Set up quantized bias tensor info and allocate space + TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); + backing.resize(biases.value().GetInfo().GetNumElements()); + + // Convert values to int32 + for (size_t i = 0; i < backing.size(); ++i) + { + float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i]; + backing[i] = armnn::numeric_cast<int32_t>(fp32Value * ( 1 / scale )); + } + + return ConstTensor(qInfo, backing); +} + +void QuantizerStrategy::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) +{ + m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid())); + m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer)); +} + +void QuantizerStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor> &constants, + const char *name, + const armnn::LayerBindingId id) +{ + IgnoreUnused(id); + + IConnectableLayer* newLayer; + + switch (layer->GetType()) + { + case armnn::LayerType::Addition : + { + newLayer = m_QuantizedNetwork->AddAdditionLayer(name); + break; + } + case armnn::LayerType::Activation : + { + const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name); + break; + } + case armnn::LayerType::ArgMinMax : + { + ArgMinMaxDescriptor argMinMaxDescriptor = static_cast<const ArgMinMaxDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name); + break; + } + case armnn::LayerType::BatchNormalization : + { + + BatchNormalizationDescriptor batchNormalizationDescriptor = + static_cast<const BatchNormalizationDescriptor&>(descriptor); + std::vector<uint8_t> meanBacking; + ConstTensor qMean = CreateQuantizedConst(constants[0], meanBacking); + + std::vector<uint8_t> varianceBacking; + ConstTensor qVariance = CreateQuantizedConst(constants[1], varianceBacking); + + std::vector<uint8_t> betaBacking; + ConstTensor qBeta = CreateQuantizedConst(constants[2], betaBacking); + + std::vector<uint8_t> gammaBacking; + ConstTensor qGamma = CreateQuantizedConst(constants[3], gammaBacking); + + newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(batchNormalizationDescriptor, + qMean, + qVariance, + qBeta, + qGamma, + name); + break; + } + case armnn::LayerType::BatchToSpaceNd : + { + BatchToSpaceNdDescriptor batchToSpaceNdDescriptor = + static_cast<const BatchToSpaceNdDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name); + break; + } + case armnn::LayerType::Comparison : + { + ComparisonDescriptor comparisonDescriptor =static_cast<const ComparisonDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name); + break; + } + case armnn::LayerType::Concat : + { + OriginsDescriptor originsDescriptor = static_cast<const OriginsDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name); + break; + } + case armnn::LayerType::Constant : + { + std::vector<uint8_t> inputBacking; + ConstTensor qInput = CreateQuantizedConst(constants[0], inputBacking); + + newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name); + break; + } + case armnn::LayerType::Convolution2d : + { + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + Optional<ConstTensor> optionalQBiases; + std::vector<int32_t> biasesBacking; + + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + Convolution2dDescriptor convolution2dDescriptor = static_cast<const Convolution2dDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::DepthToSpace : + { + DepthToSpaceDescriptor depthToSpaceDescriptor = static_cast<const DepthToSpaceDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(depthToSpaceDescriptor, name); + break; + } + case armnn::LayerType::DepthwiseConvolution2d : + { + DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor = + static_cast<const DepthwiseConvolution2dDescriptor&>(descriptor); + + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + Optional<ConstTensor> optionalQBiases; + std::vector<int32_t> biasesBacking; + + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + + newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer( + depthwiseConvolution2dDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::ElementwiseUnary : + { + ElementwiseUnaryDescriptor elementwiseUnaryDescriptor = + static_cast<const ElementwiseUnaryDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name); + break; + } + case armnn::LayerType::Fill : + { + FillDescriptor fillDescriptor = static_cast<const FillDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddFillLayer(fillDescriptor, name); + break; + } + case armnn::LayerType::FullyConnected : + { + FullyConnectedDescriptor fullyConnectedDescriptor = + static_cast<const FullyConnectedDescriptor&>(descriptor); + + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + Optional<ConstTensor> optionalQBiases; + std::vector<int32_t> biasesBacking; + + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + + newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(fullyConnectedDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::Input : + { + const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); + IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name); + + if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) + { + IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer(); + inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0)); + inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo()); + RecordLayer(layer, quantizeLayer); + return; + } + else + { + RecordLayer(layer, inputLayer); + return; + } + } + case armnn::LayerType::InstanceNormalization : + { + InstanceNormalizationDescriptor instanceNormalizationDescriptor = + static_cast<const InstanceNormalizationDescriptor&>(descriptor); + + newLayer = + m_QuantizedNetwork->AddInstanceNormalizationLayer(instanceNormalizationDescriptor, name); + break; + } + case armnn::LayerType::LogSoftmax : + { + LogSoftmaxDescriptor logSoftmaxDescriptor = static_cast<const LogSoftmaxDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name); + break; + } + case armnn::LayerType::Mean : + { + MeanDescriptor meanDescriptor = static_cast<const MeanDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name); + break; + } + case armnn::LayerType::Multiplication : + { + newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name); + break; + } + case armnn::LayerType::Normalization : + { + NormalizationDescriptor normalizationDescriptor = static_cast<const NormalizationDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name); + break; + } + case armnn::LayerType::Output : + { + const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + const DataType& dataType = info.GetDataType(); + newLayer = m_QuantizedNetwork->AddOutputLayer(id, name); + + if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) + { + IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer(); + RecordLayer(layer, dequantizeLayer); + SetQuantizedInputConnections(layer, dequantizeLayer); + dequantizeLayer->GetOutputSlot(0).Connect(newLayer->GetInputSlot(0)); + dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info); + return; + } + else + { + break; + } + } + case armnn::LayerType::Pad : + { + PadDescriptor padDescriptor = static_cast<const PadDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name); + break; + } + case armnn::LayerType::Permute : + { + PermuteDescriptor permuteDescriptor = static_cast<const PermuteDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name); + break; + } + case armnn::LayerType::Pooling2d : + { + Pooling2dDescriptor pooling2dDescriptor = static_cast<const Pooling2dDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name); + break; + } + case armnn::LayerType::Prelu : + { + newLayer = m_QuantizedNetwork->AddPreluLayer(name); + break; + } + case armnn::LayerType::Reshape : + { + ReshapeDescriptor reshapeDescriptor = static_cast<const ReshapeDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name); + break; + } + case armnn::LayerType::Resize : + { + + ResizeBilinearDescriptor resizeBilinearDescriptor = + static_cast<const ResizeBilinearDescriptor&>(descriptor); + + ResizeDescriptor resizeDescriptor; + resizeDescriptor.m_Method = ResizeMethod::Bilinear; + resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth; + resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight; + resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout; + + newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name); + break; + } + case armnn::LayerType::Slice : + { + SliceDescriptor sliceDescriptor = static_cast<const SliceDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name); + break; + } + case armnn::LayerType::Softmax : + { + SoftmaxDescriptor softmaxDescriptor = static_cast<const SoftmaxDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name); + break; + } + case armnn::LayerType::SpaceToBatchNd : + { + SpaceToBatchNdDescriptor spaceToBatchNdDescriptor = + static_cast<const SpaceToBatchNdDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name); + break; + } + case armnn::LayerType::SpaceToDepth : + { + SpaceToDepthDescriptor spaceToDepthDescriptor = static_cast<const SpaceToDepthDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name); + break; + } + case armnn::LayerType::Splitter : + { + SplitterDescriptor splitterDescriptor = static_cast<const SplitterDescriptor&>(descriptor); + newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name); + break; + } + case armnn::LayerType::Stack : + { + StackDescriptor stackDescriptor = static_cast<const StackDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name); + break; + } + case armnn::LayerType::StridedSlice : + { + StridedSliceDescriptor stridedSliceDescriptor = static_cast<const StridedSliceDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name); + break; + } + case armnn::LayerType::Subtraction : + { + newLayer = m_QuantizedNetwork->AddSubtractionLayer( name); + break; + } + case armnn::LayerType::TransposeConvolution2d : + { + + const armnn::Optional<ConstTensor> biases = constants.size() == 1 ? + armnn::Optional<ConstTensor>{} : + armnn::Optional<ConstTensor>(constants[1]); + // quantize weights + std::vector<uint8_t> weightsBacking; + ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); + + // quantize biases + std::vector<int32_t> biasesBacking; + Optional<ConstTensor> optionalQBiases; + if (biases.has_value()) + { + ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); + optionalQBiases = Optional<ConstTensor>(qBiases); + } + + TransposeConvolution2dDescriptor transposeConvolution2dDescriptor = + static_cast<const TransposeConvolution2dDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(transposeConvolution2dDescriptor, + qWeights, + optionalQBiases, + name); + break; + } + case armnn::LayerType::Transpose : + { + TransposeDescriptor transposeDescriptor = static_cast<const TransposeDescriptor&>(descriptor); + + newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name); + break; + } + default: + { + throw UnimplementedException("Unimplemented layer encountered"); + } + } + RecordLayer(layer, newLayer); + SetQuantizedInputConnections(layer, newLayer); +} + +} + diff --git a/src/armnn/QuantizerStrategy.hpp b/src/armnn/QuantizerStrategy.hpp new file mode 100644 index 0000000000..f782959020 --- /dev/null +++ b/src/armnn/QuantizerStrategy.hpp @@ -0,0 +1,63 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "Network.hpp" +#include "NetworkQuantizerUtils.hpp" +#include "StaticRangeStrategy.hpp" + +#include <armnn/utility/NumericCast.hpp> +#include <armnn/utility/PolymorphicDowncast.hpp> + +namespace armnn +{ +class QuantizerStrategy : public IStrategy +{ +public : + QuantizerStrategy(const RangeTracker& rangeTracker, + const IQuantizationScheme* quantizationScheme, + bool preserveType); + + ~QuantizerStrategy() = default; + + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override; + + /// Extract the quantized network + INetworkPtr RetrieveFinalNetwork() { return std::move(m_QuantizedNetwork); } + +private: + /// Connects the layer to preceeding layers and sets the quantization parameters based on recorded ranges + void SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer); + + /// Record the guids so we can easily find the layers later + void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer); + + /// Sets the bias quantization scale based on input and weight scales + ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer, + const ConstTensor& weights, + const Optional<ConstTensor>& biases, + std::vector<int32_t>& weightsBacking); + + /// Reference to the static range visitor used to retrieve the quantization ranges + const RangeTracker& m_Ranges; + + /// Quantized version of the model we are building up + INetworkPtr m_QuantizedNetwork; + + /// Mapping from input network guids to quantized network guids + std::unordered_map<LayerGuid, LayerGuid> m_OriginalToQuantizedGuidMap; + + /// Mapping from guid to layer in quantized network + std::unordered_map<LayerGuid, IConnectableLayer*> m_QuantizedGuidToLayerMap; + + const IQuantizationScheme* m_QuantizationScheme; + + const bool m_PreserveType; +}; + +} //namespace armnn
\ No newline at end of file diff --git a/src/armnn/QuantizerVisitor.cpp b/src/armnn/QuantizerVisitor.cpp deleted file mode 100644 index 0e9d22463f..0000000000 --- a/src/armnn/QuantizerVisitor.cpp +++ /dev/null @@ -1,589 +0,0 @@ -// -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "Network.hpp" -#include "NetworkQuantizerUtils.hpp" -#include "QuantizerVisitor.hpp" -#include "StaticRangeVisitor.hpp" - -#include <armnn/utility/NumericCast.hpp> -#include <armnn/utility/PolymorphicDowncast.hpp> - -namespace armnn -{ - -QuantizerVisitor::QuantizerVisitor(const RangeTracker& rangeTracker, - const IQuantizationScheme* quantizationScheme, - bool preserveType) - : m_Ranges(rangeTracker) - , m_QuantizedNetwork(INetwork::Create()) - , m_QuantizationScheme(quantizationScheme) - , m_PreserveType(preserveType) -{ -} - -void QuantizerVisitor::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, - IConnectableLayer* quantizedLayer) -{ - ARMNN_ASSERT(srcLayer); - for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) - { - const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); - const InputSlot* inputSlot = PolymorphicDowncast<const InputSlot*>(&srcInputSlot); - ARMNN_ASSERT(inputSlot); - const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); - - ARMNN_ASSERT(outputSlot); - unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); - Layer& layerToFind = outputSlot->GetOwningLayer(); - - auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); - if (found == m_OriginalToQuantizedGuidMap.end()) - { - // Error in graph traversal order - ARMNN_ASSERT_MSG(false, "Error in graph traversal"); - return; - } - - // Connect the slots in the quantized model - IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second]; - IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i); - IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx); - newOutputSlot.Connect(newInputSlot); - TensorInfo info(outputSlot->GetTensorInfo()); - - // Only try to set quantization params on tensors that can be quantized - if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean && - inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 && - inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64) - { - // Fetch the min/max ranges that were computed earlier - auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); - info.SetDataType(m_QuantizationScheme->GetDataType()); - info.SetQuantizationOffset(qParams.second); - info.SetQuantizationScale(qParams.first); - } - newOutputSlot.SetTensorInfo(info); - } -} - -ConstTensor QuantizerVisitor::CreateQuantizedBias(const IConnectableLayer* srcLayer, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - std::vector<int32_t>& backing) -{ - ARMNN_ASSERT(srcLayer); - const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); - auto inputSlot = PolymorphicDowncast<const InputSlot*>(&srcInputSlot); - ARMNN_ASSERT(inputSlot); - const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); - - ARMNN_ASSERT(outputSlot); - unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); - Layer& layerToFind = outputSlot->GetOwningLayer(); - - auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); - if (found == m_OriginalToQuantizedGuidMap.end()) - { - // Error in graph traversal order - ARMNN_ASSERT_MSG(false, "Error in graph traversal"); - return biases.value(); - } - - // Fetch the min/max ranges that were computed earlier - auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); - - // Get the quantization scale based on input and weight scale - float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); - - // Set up quantized bias tensor info and allocate space - TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); - backing.resize(biases.value().GetInfo().GetNumElements()); - - // Convert values to int32 - for (size_t i = 0; i < backing.size(); ++i) - { - float fp32Value = static_cast<const float*>(biases.value().GetMemoryArea())[i]; - backing[i] = armnn::numeric_cast<int32_t>(fp32Value * ( 1 / scale )); - } - - return ConstTensor(qInfo, backing); -} - -void QuantizerVisitor::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) -{ - m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid())); - m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer)); -} - -void QuantizerVisitor::VisitAbsLayer(const IConnectableLayer* layer, const char* name) -{ - VisitElementwiseUnaryLayer(layer, ElementwiseUnaryDescriptor(UnaryOperation::Abs), name); -} - -void QuantizerVisitor::VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& activationDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitAdditionLayer(const IConnectableLayer* layer, const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddAdditionLayer(name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor& argMinMaxDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name) -{ - std::vector<uint8_t> meanBacking; - ConstTensor qMean = CreateQuantizedConst(mean, meanBacking); - - std::vector<uint8_t> varianceBacking; - ConstTensor qVariance = CreateQuantizedConst(variance, varianceBacking); - - std::vector<uint8_t> betaBacking; - ConstTensor qBeta = CreateQuantizedConst(beta, betaBacking); - - std::vector<uint8_t> gammaBacking; - ConstTensor qGamma = CreateQuantizedConst(gamma, gammaBacking); - - IConnectableLayer* newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(desc, - qMean, - qVariance, - qBeta, - qGamma, - name); - - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitComparisonLayer(const IConnectableLayer* layer, - const ComparisonDescriptor& comparisonDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitConcatLayer(const IConnectableLayer* layer, - const OriginsDescriptor& originsDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name) -{ - std::vector<uint8_t> inputBacking; - ConstTensor qInput = CreateQuantizedConst(input, inputBacking); - - IConnectableLayer* newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name); - RecordLayer(layer, newLayer); -} - -void QuantizerVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); - Optional<ConstTensor> optionalQBiases; - std::vector<int32_t> biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - IConnectableLayer* newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor, - qWeights, - optionalQBiases, - name); - - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitDepthToSpaceLayer(const IConnectableLayer* layer, - const DepthToSpaceDescriptor& descriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(descriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, - const DepthwiseConvolution2dDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); - Optional<ConstTensor> optionalQBiases; - std::vector<int32_t> biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - IConnectableLayer* newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer(desc, - qWeights, - optionalQBiases, - name); - - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitElementwiseUnaryLayer(const IConnectableLayer* layer, - const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitFillLayer(const IConnectableLayer* layer, - const FillDescriptor& desc, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddFillLayer(desc, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitFullyConnectedLayer(const IConnectableLayer *layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name) -{ - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); - Optional<ConstTensor> optionalQBiases; - std::vector<int32_t> biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - IConnectableLayer* newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(desc, - qWeights, - optionalQBiases, - name); - - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitInputLayer(const IConnectableLayer *layer, LayerBindingId id, const char *name) -{ - const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); - IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name); - - if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) - { - IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer(); - inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0)); - inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo()); - RecordLayer(layer, quantizeLayer); - } - else - { - RecordLayer(layer, inputLayer); - } -} - -void QuantizerVisitor::VisitInstanceNormalizationLayer(const IConnectableLayer* layer, - const InstanceNormalizationDescriptor& descriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddInstanceNormalizationLayer(descriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitLogSoftmaxLayer(const IConnectableLayer* layer, - const LogSoftmaxDescriptor& logSoftmaxDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitMeanLayer(const IConnectableLayer* layer, - const MeanDescriptor& meanDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitMultiplicationLayer(const IConnectableLayer* layer, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitNormalizationLayer(const armnn::IConnectableLayer* layer, - const armnn::NormalizationDescriptor& normalizationDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name) -{ - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - const DataType& dataType = info.GetDataType(); - IConnectableLayer* outputLayer = m_QuantizedNetwork->AddOutputLayer(id, name); - - if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) - { - IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer(); - RecordLayer(layer, dequantizeLayer); - SetQuantizedInputConnections(layer, dequantizeLayer); - dequantizeLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info); - } - else - { - RecordLayer(layer, outputLayer); - SetQuantizedInputConnections(layer, outputLayer); - } -} - -void QuantizerVisitor::VisitPadLayer(const IConnectableLayer* layer, - const PadDescriptor& padDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& permuteDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& pooling2dDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitPreluLayer(const IConnectableLayer* layer, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddPreluLayer(name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer, - const ResizeBilinearDescriptor& resizeBilinearDescriptor, - const char* name) -{ - ResizeDescriptor resizeDescriptor; - resizeDescriptor.m_Method = ResizeMethod::Bilinear; - resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth; - resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight; - resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout; - - VisitResizeLayer(layer, resizeDescriptor, name); -} - -void QuantizerVisitor::VisitResizeLayer(const IConnectableLayer* layer, - const ResizeDescriptor& resizeDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitRsqrtLayer(const IConnectableLayer* layer, const char* name) -{ - VisitElementwiseUnaryLayer(layer, ElementwiseUnaryDescriptor(UnaryOperation::Rsqrt), name); -} - -void QuantizerVisitor::VisitSliceLayer(const IConnectableLayer* layer, - const SliceDescriptor& sliceDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& softmaxDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitSpaceToDepthLayer(const IConnectableLayer* layer, - const SpaceToDepthDescriptor& spaceToDepthDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& splitterDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitStackLayer(const IConnectableLayer* layer, - const StackDescriptor& stackDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& stridedSliceDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitSubtractionLayer(const IConnectableLayer* layer, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddSubtractionLayer(name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitTransposeConvolution2dLayer(const IConnectableLayer* layer, - const TransposeConvolution2dDescriptor& descriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - // quantize weights - std::vector<uint8_t> weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(weights, weightsBacking); - - // quantize biases - std::vector<int32_t> biasesBacking; - Optional<ConstTensor> optionalQBiases; - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional<ConstTensor>(qBiases); - } - - IConnectableLayer* newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(descriptor, - qWeights, - optionalQBiases, - name); - - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -void QuantizerVisitor::VisitTransposeLayer(const IConnectableLayer* layer, - const TransposeDescriptor& transposeDescriptor, - const char* name) -{ - IConnectableLayer* newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name); - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -} //namespace armnn diff --git a/src/armnn/QuantizerVisitor.hpp b/src/armnn/QuantizerVisitor.hpp deleted file mode 100644 index 65bd67101e..0000000000 --- a/src/armnn/QuantizerVisitor.hpp +++ /dev/null @@ -1,231 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "armnn/LayerVisitorBase.hpp" -#include "StaticRangeVisitor.hpp" -#include "NetworkQuantizationScheme.hpp" - -#include <armnn/INetwork.hpp> -#include <armnn/Types.hpp> -#include <armnnQuantizer/INetworkQuantizer.hpp> - -#include <unordered_map> - -namespace armnn -{ - -// Forward declaration -class StaticRangeVisitor; - -/// Visitor object for quantizing layers in a network -class QuantizerVisitor : public LayerVisitorBase<VisitorThrowingPolicy> -{ -public: - QuantizerVisitor(const RangeTracker& rangeTracker, - const IQuantizationScheme* quantizationScheme, - bool preserveType = false); - - ~QuantizerVisitor() = default; - - /// Functions to quantize the individual layers, overridden from ILayerVisitor - ARMNN_DEPRECATED_MSG("Use VisitElementwiseUnaryLayer instead") - void VisitAbsLayer(const IConnectableLayer* layer, const char* name = nullptr) override; - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& activationDescriptor, - const char* name = nullptr) override; - - void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override; - - void VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor& argMinMaxDescriptor, - const char* name = nullptr) override; - - void VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name = nullptr) override; - - void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name = nullptr) override; - - void VisitComparisonLayer(const IConnectableLayer* layer, - const ComparisonDescriptor& comparisonDescriptor, - const char* name = nullptr) override; - - void VisitConcatLayer(const IConnectableLayer* layer, - const OriginsDescriptor& originsDescriptor, - const char* name = nullptr) override; - - void VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name = nullptr) override; - - void VisitConvolution2dLayer(const IConnectableLayer* layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitDepthToSpaceLayer(const IConnectableLayer* layer, - const DepthToSpaceDescriptor& depthToSpaceDescriptor, - const char* name = nullptr) override; - - void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, - const DepthwiseConvolution2dDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitElementwiseUnaryLayer(const IConnectableLayer* layer, - const ElementwiseUnaryDescriptor& elementwiseUnaryDescriptor, - const char* name = nullptr) override; - - void VisitFillLayer(const IConnectableLayer* layer, - const FillDescriptor& desc, - const char* name) override; - - void VisitFullyConnectedLayer(const IConnectableLayer *layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name = nullptr) override; - - void VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr) override; - - void VisitInstanceNormalizationLayer(const IConnectableLayer* layer, - const InstanceNormalizationDescriptor& instanceNormalizationDescriptor, - const char* name = nullptr) override; - - void VisitLogSoftmaxLayer(const IConnectableLayer* layer, - const LogSoftmaxDescriptor& logSoftmaxDescriptor, - const char* name = nullptr) override; - - void VisitMeanLayer(const IConnectableLayer* layer, - const MeanDescriptor& meanDescriptor, - const char* name = nullptr) override; - - void VisitMultiplicationLayer(const IConnectableLayer* layer, - const char* name = nullptr) override; - - void VisitNormalizationLayer(const IConnectableLayer* layer, - const NormalizationDescriptor& normalizationDescriptor, - const char* name = nullptr) override; - - void VisitOutputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr) override; - - void VisitPadLayer(const IConnectableLayer*, - const PadDescriptor&, - const char* name = nullptr) override; - - void VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& permuteDescriptor, - const char* name = nullptr) override; - - void VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& pooling2dDescriptor, - const char* name = nullptr) override; - - void VisitPreluLayer(const IConnectableLayer* layer, - const char* name = nullptr) override; - - void VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name = nullptr) override; - - void VisitResizeLayer(const IConnectableLayer* layer, - const ResizeDescriptor& resizeDescriptor, - const char* name = nullptr) override; - - ARMNN_DEPRECATED_MSG("Use VisitResizeLayer instead") - void VisitResizeBilinearLayer(const IConnectableLayer* layer, - const ResizeBilinearDescriptor& resizeDesc, - const char* name = nullptr) override; - - ARMNN_DEPRECATED_MSG("Use VisitElementwiseUnaryLayer instead") - void VisitRsqrtLayer(const IConnectableLayer*, - const char* name = nullptr) override; - - void VisitSliceLayer(const IConnectableLayer* layer, - const SliceDescriptor& sliceDescriptor, - const char* name = nullptr) override; - - void VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& softmaxDescriptor, - const char* name = nullptr) override; - - void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name = nullptr) override; - - void VisitSpaceToDepthLayer(const IConnectableLayer* layer, - const SpaceToDepthDescriptor& spaceToDepthDescriptor, - const char* name = nullptr) override; - - void VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& splitterDescriptor, - const char* name = nullptr) override; - - void VisitStackLayer(const IConnectableLayer* layer, - const StackDescriptor& stackDescriptor, - const char* name = nullptr) override; - - void VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& stridedSliceDescriptor, - const char* name = nullptr) override; - - void VisitSubtractionLayer(const IConnectableLayer* layer, - const char* name = nullptr) override; - - void VisitTransposeConvolution2dLayer(const IConnectableLayer* layer, - const TransposeConvolution2dDescriptor& descriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitTransposeLayer(const IConnectableLayer* layer, - const TransposeDescriptor& descriptor, - const char* name = nullptr) override; - - /// Extract the quantized network - INetworkPtr RetrieveFinalNetwork() { return std::move(m_QuantizedNetwork); } - -private: - /// Connects the layer to preceeding layers and sets the quantization parameters based on recorded ranges - void SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer); - - /// Record the guids so we can easily find the layers later - void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer); - - /// Sets the bias quantization scale based on input and weight scales - ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - std::vector<int32_t>& weightsBacking); - - /// Reference to the static range visitor used to retrieve the quantization ranges - const RangeTracker& m_Ranges; - - /// Quantized version of the model we are building up - INetworkPtr m_QuantizedNetwork; - - /// Mapping from input network guids to quantized network guids - std::unordered_map<LayerGuid, LayerGuid> m_OriginalToQuantizedGuidMap; - - /// Mapping from guid to layer in quantized network - std::unordered_map<LayerGuid, IConnectableLayer*> m_QuantizedGuidToLayerMap; - - const IQuantizationScheme* m_QuantizationScheme; - - const bool m_PreserveType; -}; - -} //namespace armnn diff --git a/src/armnn/StaticRangeStrategy.cpp b/src/armnn/StaticRangeStrategy.cpp new file mode 100644 index 0000000000..84b8d24068 --- /dev/null +++ b/src/armnn/StaticRangeStrategy.cpp @@ -0,0 +1,193 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "StaticRangeStrategy.hpp" + +#include <armnn/utility/IgnoreUnused.hpp> +#include <armnn/Descriptors.hpp> +#include <armnn/Types.hpp> + +#include <limits> + +namespace armnn +{ + +StaticRangeStrategy::StaticRangeStrategy(RangeTracker& rangeTracker) + : m_RangeTracker(rangeTracker) +{} + +void StaticRangeStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) +{ + m_RangeTracker.SetRange(layer, outputIdx, min, max); +} + +void StaticRangeStrategy::ForwardParentParameters(const IConnectableLayer* layer) +{ + const auto parentRange = m_RangeTracker.GetRange(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), 0); + SetRange(layer, 0, parentRange.first, parentRange.second); +} + + +void StaticRangeStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, + const BaseDescriptor &descriptor, + const std::vector<armnn::ConstTensor> &constants, + const char *name, + const armnn::LayerBindingId id) +{ +IgnoreUnused(id, name); + +switch (layer->GetType()) +{ + case armnn::LayerType::Activation : + { + const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor); + + switch (activationDescriptor.m_Function) + { + // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu + case ActivationFunction::Abs: + case ActivationFunction::Linear: + case ActivationFunction::ReLu: + case ActivationFunction::SoftReLu: + SetRange(layer, 0, 0.f, 15.f); + break; + case ActivationFunction::BoundedReLu: + SetRange(layer, 0, 0.f, activationDescriptor.m_A); + break; + case ActivationFunction::TanH: + SetRange(layer, 0, -1.f, 1.f); + break; + case ActivationFunction::LeakyReLu: + SetRange(layer, 0, -5.f, 15.f); + break; + default: + SetRange(layer, 0, -15.f, 15.f); + break; + } + break; + } + case armnn::LayerType::Addition : + { + SetRange(layer, 0, -20.f, 20.f); + break; + } + case armnn::LayerType::ArgMinMax : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::BatchToSpaceNd : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::BatchNormalization : + { + SetRange(layer, 0, -15.0f, 15.0f); + break; + } + case armnn::LayerType::Concat : + { + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) + { + const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); + LayerGuid layerId = outputSlot->GetOwningLayerGuid(); + unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); + RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); + min = std::min(min, range.first); + max = std::max(max, range.second); + } + SetRange(layer, 0, min, max); + break; + } + case armnn::LayerType::Constant : + { + + if (constants[0].GetDataType() != DataType::Float32) + { + throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); + } + + // Work out the range based on the input constants + unsigned int inputNumElements = constants[0].GetNumElements(); + const float* inputData = reinterpret_cast<const float*>(constants[0].GetMemoryArea()); + + float min = std::numeric_limits<float>::max(); + float max = std::numeric_limits<float>::lowest(); + + for (unsigned int i = 0; i < inputNumElements; i++) + { + const float inputValue = inputData[i]; + + min = std::min(min, inputValue); + max = std::max(max, inputValue); + } + SetRange(layer, 0, min, max); + break; + } + case armnn::LayerType::Convolution2d : + { + SetRange(layer, 0, -15.0f, 15.0f); + break; + } + case armnn::LayerType::DepthwiseConvolution2d : + { + SetRange(layer, 0, -15.0f, 15.0f); + break; + } + case armnn::LayerType::FullyConnected : + { + SetRange(layer, 0, -15.0f, 15.0f); + break; + } + case armnn::LayerType::Permute : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::Pooling2d : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::Reshape : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::Resize : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::Splitter : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::SpaceToBatchNd : + { + ForwardParentParameters(layer); + break; + } + case armnn::LayerType::Softmax : + { + SetRange(layer, 0, 0.f, 1.f); + break; + } + case armnn::LayerType::StridedSlice : + { + ForwardParentParameters(layer); + break; + } + default: + { + } +} +} + +} //namespace armnn diff --git a/src/armnn/StaticRangeStrategy.hpp b/src/armnn/StaticRangeStrategy.hpp new file mode 100644 index 0000000000..ed7cf274fe --- /dev/null +++ b/src/armnn/StaticRangeStrategy.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "armnn/LayerVisitorBase.hpp" +#include "RangeTracker.hpp" + +#include <armnn/INetwork.hpp> +#include <armnnQuantizer/INetworkQuantizer.hpp> + + +namespace armnn +{ + +class StaticRangeStrategy : public IStrategy +{ +public: + StaticRangeStrategy(RangeTracker& rangeTracker); + ~StaticRangeStrategy() = default; + + void ExecuteStrategy(const armnn::IConnectableLayer *layer, + const BaseDescriptor &descriptor, + const std::vector<armnn::ConstTensor> &constants, + const char *name, + const armnn::LayerBindingId id) override; + +private: + /// Set the range for an output slot on a layer + void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); + + void ForwardParentParameters(const IConnectableLayer* layer); + + /// Mapping from a layer Guid to an array of ranges for outputs + RangeTracker& m_RangeTracker; + +}; + +} //namespace armnn diff --git a/src/armnn/StaticRangeVisitor.cpp b/src/armnn/StaticRangeVisitor.cpp deleted file mode 100644 index 210c666739..0000000000 --- a/src/armnn/StaticRangeVisitor.cpp +++ /dev/null @@ -1,270 +0,0 @@ -// -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "StaticRangeVisitor.hpp" - -#include <armnn/utility/IgnoreUnused.hpp> -#include <armnn/Descriptors.hpp> -#include <armnn/Types.hpp> - -#include <limits> - -namespace armnn -{ - -StaticRangeVisitor::StaticRangeVisitor(RangeTracker& rangeTracker) - : m_RangeTracker(rangeTracker) -{} - -void StaticRangeVisitor::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) -{ - m_RangeTracker.SetRange(layer, outputIdx, min, max); -} - -void StaticRangeVisitor::ForwardParentParameters(const IConnectableLayer* layer) -{ - const auto parentRange = m_RangeTracker.GetRange(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), 0); - SetRange(layer, 0, parentRange.first, parentRange.second); -} - -void StaticRangeVisitor::VisitAdditionLayer(const IConnectableLayer* layer, const char* name) -{ - IgnoreUnused(name); - SetRange(layer, 0, -20.f, 20.f); -} - -void StaticRangeVisitor::VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name) -{ - IgnoreUnused(desc); - IgnoreUnused(mean); - IgnoreUnused(variance); - IgnoreUnused(beta); - IgnoreUnused(gamma); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); -} - -void StaticRangeVisitor::VisitConvolution2dLayer(const IConnectableLayer* layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - IgnoreUnused(convolution2dDescriptor); - IgnoreUnused(weights); - IgnoreUnused(biases); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); -} - -void StaticRangeVisitor::VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, - const DepthwiseConvolution2dDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - IgnoreUnused(desc); - IgnoreUnused(weights); - IgnoreUnused(biases); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); -} - -void StaticRangeVisitor::VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& activationDescriptor, - const char* name) -{ - IgnoreUnused(name); - switch (activationDescriptor.m_Function) - { - // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu - case ActivationFunction::Abs: - case ActivationFunction::Linear: - case ActivationFunction::ReLu: - case ActivationFunction::SoftReLu: - SetRange(layer, 0, 0.f, 15.f); - break; - case ActivationFunction::BoundedReLu: - SetRange(layer, 0, 0.f, activationDescriptor.m_A); - break; - case ActivationFunction::TanH: - SetRange(layer, 0, -1.f, 1.f); - break; - case ActivationFunction::LeakyReLu: - SetRange(layer, 0, -5.f, 15.f); - break; - default: - SetRange(layer, 0, -15.f, 15.f); - break; - } -} - -void StaticRangeVisitor::VisitArgMinMaxLayer(const armnn::IConnectableLayer* layer, - const armnn::ArgMinMaxDescriptor& argMinMaxDescriptor, - const char* name) -{ - IgnoreUnused(argMinMaxDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitFullyConnectedLayer(const IConnectableLayer* layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name) -{ - IgnoreUnused(desc); - IgnoreUnused(weights); - IgnoreUnused(biases); - IgnoreUnused(name); - SetRange(layer, 0, -15.0f, 15.0f); -} - -void StaticRangeVisitor::VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& permuteDescriptor, - const char* name) -{ - IgnoreUnused(permuteDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name) -{ - IgnoreUnused(spaceToBatchNdDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& pooling2dDescriptor, - const char* name) -{ - IgnoreUnused(pooling2dDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& softmaxDescriptor, - const char* name) -{ - IgnoreUnused(softmaxDescriptor); - IgnoreUnused(name); - SetRange(layer, 0, 0.f, 1.f); -} - -void StaticRangeVisitor::VisitConcatLayer(const IConnectableLayer* layer, - const OriginsDescriptor& originsDescriptor, - const char* name) -{ - IgnoreUnused(originsDescriptor); - IgnoreUnused(name); - float min = std::numeric_limits<float>::max(); - float max = std::numeric_limits<float>::lowest(); - for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) - { - const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); - LayerGuid layerId = outputSlot->GetOwningLayerGuid(); - unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); - RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); - min = std::min(min, range.first); - max = std::max(max, range.second); - } - SetRange(layer, 0, min, max); -} - -void StaticRangeVisitor::VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name) -{ - IgnoreUnused(name); - - if (input.GetDataType() != DataType::Float32) - { - throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); - } - - // Work out the range based on the input constants - unsigned int inputNumElements = input.GetNumElements(); - const float* inputData = reinterpret_cast<const float*>(input.GetMemoryArea()); - - float min = std::numeric_limits<float>::max(); - float max = std::numeric_limits<float>::lowest(); - - for (unsigned int i = 0; i < inputNumElements; i++) - { - const float inputValue = inputData[i]; - - min = std::min(min, inputValue); - max = std::max(max, inputValue); - } - SetRange(layer, 0, min, max); -} - -void StaticRangeVisitor::VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name) -{ - IgnoreUnused(reshapeDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& splitterDescriptor, - const char* name) -{ - IgnoreUnused(splitterDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitResizeBilinearLayer(const IConnectableLayer* layer, - const ResizeBilinearDescriptor& resizeDesc, - const char* name) -{ - IgnoreUnused(resizeDesc); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitResizeLayer(const IConnectableLayer* layer, - const ResizeDescriptor& resizeDescriptor, - const char* name) -{ - IgnoreUnused(resizeDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& stridedSliceDescriptor, - const char* name) -{ - IgnoreUnused(stridedSliceDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -void StaticRangeVisitor::VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name) -{ - IgnoreUnused(batchToSpaceNdDescriptor); - IgnoreUnused(name); - ForwardParentParameters(layer); -} - -} //namespace armnn diff --git a/src/armnn/StaticRangeVisitor.hpp b/src/armnn/StaticRangeVisitor.hpp deleted file mode 100644 index 20e3cb0292..0000000000 --- a/src/armnn/StaticRangeVisitor.hpp +++ /dev/null @@ -1,120 +0,0 @@ -// -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "armnn/LayerVisitorBase.hpp" -#include "RangeTracker.hpp" - -#include <armnn/INetwork.hpp> -#include <armnnQuantizer/INetworkQuantizer.hpp> - - -namespace armnn -{ - -/// Visitor class to establish min/max ranges based on the type of the layer -class StaticRangeVisitor : public LayerVisitorBase<VisitorNoThrowPolicy> -{ -public: - StaticRangeVisitor(RangeTracker& rangeTracker); - ~StaticRangeVisitor() = default; - - /// Functions to set the Range on a per-layer-type basis - void VisitAdditionLayer(const IConnectableLayer* layer, const char* name = nullptr) override; - - void VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor& desc, - const char* name = nullptr) override; - - void VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name = nullptr) override; - - void VisitConvolution2dLayer(const IConnectableLayer* layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitDepthwiseConvolution2dLayer(const IConnectableLayer* layer, - const DepthwiseConvolution2dDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override; - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& activationDescriptor, - const char* name = nullptr) override; - - void VisitFullyConnectedLayer(const IConnectableLayer *layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name) override; - - void VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& permuteDescriptor, - const char* name) override; - - void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name = nullptr) override; - - void VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& pooling2dDescriptor, - const char* name) override; - - void VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& softmaxDescriptor, - const char* name = nullptr) override; - - void VisitConcatLayer(const IConnectableLayer* layer, - const OriginsDescriptor& originsDescriptor, - const char* name = nullptr) override; - - void VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name = nullptr) override; - - void VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name = nullptr) override; - - void VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& splitterDescriptor, - const char* name = nullptr) override; - - void VisitResizeBilinearLayer(const IConnectableLayer* layer, - const ResizeBilinearDescriptor& resizeDesc, - const char* name = nullptr) override; - - void VisitResizeLayer(const IConnectableLayer* layer, - const ResizeDescriptor& resizeDescriptor, - const char* name = nullptr) override; - - void VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& stridedSliceDescriptor, - const char* name = nullptr) override; - - void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name = nullptr) override; - -private: - /// Set the range for an output slot on a layer - void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); - - void ForwardParentParameters(const IConnectableLayer* layer); - - /// Mapping from a layer Guid to an array of ranges for outputs - RangeTracker& m_RangeTracker; -}; - -} //namespace armnn diff --git a/src/armnn/layers/BatchNormalizationLayer.cpp b/src/armnn/layers/BatchNormalizationLayer.cpp index ce351a4376..6df5195a55 100644 --- a/src/armnn/layers/BatchNormalizationLayer.cpp +++ b/src/armnn/layers/BatchNormalizationLayer.cpp @@ -80,4 +80,14 @@ void BatchNormalizationLayer::Accept(ILayerVisitor& visitor) const this, GetParameters(), meanTensor, varianceTensor, betaTensor, gammaTensor, GetName()); } +void BatchNormalizationLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_Mean->GetTensorInfo(), m_Mean->Map(true)}, + {m_Variance->GetTensorInfo(), m_Variance->Map(true)}, + {m_Beta->GetTensorInfo(), m_Beta->Map(true)}, + {m_Gamma->GetTensorInfo(), m_Gamma->Map(true)} }; + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/BatchNormalizationLayer.hpp b/src/armnn/layers/BatchNormalizationLayer.hpp index 3915897a52..dab75d1e12 100644 --- a/src/armnn/layers/BatchNormalizationLayer.hpp +++ b/src/armnn/layers/BatchNormalizationLayer.hpp @@ -41,6 +41,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a BatchNormalizationLayer. /// @param [in] param BatchNormalizationDescriptor to configure the batch normalization operation. diff --git a/src/armnn/layers/ConstantLayer.cpp b/src/armnn/layers/ConstantLayer.cpp index 76b9997cfe..31e9e974cf 100644 --- a/src/armnn/layers/ConstantLayer.cpp +++ b/src/armnn/layers/ConstantLayer.cpp @@ -68,4 +68,10 @@ void ConstantLayer::Accept(ILayerVisitor& visitor) const visitor.VisitConstantLayer(this, layerOutputTensor, GetName()); } +void ConstantLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_LayerOutput->GetTensorInfo(), m_LayerOutput->Map(true)} }; + strategy.ExecuteStrategy(this, BaseDescriptor(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/ConstantLayer.hpp b/src/armnn/layers/ConstantLayer.hpp index 36fa1f96e9..9d91551df9 100644 --- a/src/armnn/layers/ConstantLayer.hpp +++ b/src/armnn/layers/ConstantLayer.hpp @@ -41,6 +41,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + std::unique_ptr<ScopedCpuTensorHandle> m_LayerOutput; protected: diff --git a/src/armnn/layers/Convolution2dLayer.cpp b/src/armnn/layers/Convolution2dLayer.cpp index 18557bf64e..0c3040ea6e 100644 --- a/src/armnn/layers/Convolution2dLayer.cpp +++ b/src/armnn/layers/Convolution2dLayer.cpp @@ -157,4 +157,16 @@ void Convolution2dLayer::Accept(ILayerVisitor& visitor) const visitor.VisitConvolution2dLayer(this, GetParameters(), weightsTensor, optionalBiasTensor, GetName()); } +void Convolution2dLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_Weight->GetTensorInfo(), m_Weight->Map(true)} }; + + if (GetParameters().m_BiasEnabled) + { + constTensors.emplace_back(ConstTensor(m_Bias->GetTensorInfo(), m_Bias->Map(true))); + } + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/Convolution2dLayer.hpp b/src/armnn/layers/Convolution2dLayer.hpp index 4dd1497fd8..440c80dfa9 100644 --- a/src/armnn/layers/Convolution2dLayer.hpp +++ b/src/armnn/layers/Convolution2dLayer.hpp @@ -44,6 +44,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + void SerializeLayerParameters(ParameterStringifyFunction& fn) const override; protected: diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp index ff9cebafd5..1871b7d15d 100644 --- a/src/armnn/layers/DepthwiseConvolution2dLayer.cpp +++ b/src/armnn/layers/DepthwiseConvolution2dLayer.cpp @@ -165,4 +165,16 @@ void DepthwiseConvolution2dLayer::Accept(ILayerVisitor& visitor) const visitor.VisitDepthwiseConvolution2dLayer(this, GetParameters(), weightsTensor, optionalBiasTensor, GetName()); } +void DepthwiseConvolution2dLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_Weight->GetTensorInfo(), m_Weight->Map(true)} }; + + if (GetParameters().m_BiasEnabled) + { + constTensors.emplace_back(ConstTensor(m_Bias->GetTensorInfo(), m_Bias->Map(true))); + } + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/DepthwiseConvolution2dLayer.hpp b/src/armnn/layers/DepthwiseConvolution2dLayer.hpp index dd0b0e6b88..7388cbcd8e 100644 --- a/src/armnn/layers/DepthwiseConvolution2dLayer.hpp +++ b/src/armnn/layers/DepthwiseConvolution2dLayer.hpp @@ -43,6 +43,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + void SerializeLayerParameters(ParameterStringifyFunction& fn) const override; protected: diff --git a/src/armnn/layers/DetectionPostProcessLayer.cpp b/src/armnn/layers/DetectionPostProcessLayer.cpp index d54bf26c40..356377a2f5 100644 --- a/src/armnn/layers/DetectionPostProcessLayer.cpp +++ b/src/armnn/layers/DetectionPostProcessLayer.cpp @@ -84,4 +84,11 @@ void DetectionPostProcessLayer::Accept(ILayerVisitor& visitor) const visitor.VisitDetectionPostProcessLayer(this, GetParameters(), anchorTensor, GetName()); } +void DetectionPostProcessLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_Anchors->GetTensorInfo(), m_Anchors->GetConstTensor<void>()} }; + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/DetectionPostProcessLayer.hpp b/src/armnn/layers/DetectionPostProcessLayer.hpp index 374eef5ec5..b0d58589b4 100644 --- a/src/armnn/layers/DetectionPostProcessLayer.hpp +++ b/src/armnn/layers/DetectionPostProcessLayer.hpp @@ -36,6 +36,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a DetectionPostProcessLayer. /// @param [in] param DetectionPostProcessDescriptor to configure the detection postprocess. diff --git a/src/armnn/layers/ElementwiseBaseLayer.cpp b/src/armnn/layers/ElementwiseBaseLayer.cpp index 631e08c2ac..a169d31b2d 100644 --- a/src/armnn/layers/ElementwiseBaseLayer.cpp +++ b/src/armnn/layers/ElementwiseBaseLayer.cpp @@ -82,4 +82,9 @@ void ElementwiseBaseLayer::ValidateTensorShapesFromInputs() ValidateAndCopyShape(outputShape, inferredShapes[0], m_ShapeInferenceMethod, GetLayerTypeAsCString(GetType())); } +void ElementwiseBaseLayer::ExecuteStrategy(IStrategy& strategy) const +{ + strategy.ExecuteStrategy(this, BaseDescriptor(), {}, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/ElementwiseBaseLayer.hpp b/src/armnn/layers/ElementwiseBaseLayer.hpp index 3893dcd9f9..17e8b446e0 100644 --- a/src/armnn/layers/ElementwiseBaseLayer.hpp +++ b/src/armnn/layers/ElementwiseBaseLayer.hpp @@ -27,6 +27,8 @@ public: /// @return A vector to the inferred output shape. std::vector<TensorShape> InferOutputShapes(const std::vector<TensorShape>& inputShapes) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// @param numInputSlots The number of input slots for the layer. /// @param numOutputSlots The number of output slots for the layer. diff --git a/src/armnn/layers/FakeQuantizationLayer.cpp b/src/armnn/layers/FakeQuantizationLayer.cpp index a316b2b82a..102a6725a7 100644 --- a/src/armnn/layers/FakeQuantizationLayer.cpp +++ b/src/armnn/layers/FakeQuantizationLayer.cpp @@ -52,4 +52,10 @@ void FakeQuantizationLayer::Accept(ILayerVisitor& visitor) const throw armnn::Exception("FakeQuantizationLayer should not appear in an input graph"); } +void FakeQuantizationLayer::ExecuteStrategy(IStrategy& strategy) const +{ + IgnoreUnused(strategy); + throw armnn::Exception("FakeQuantizationLayer should not appear in an input graph"); +} + } // namespace armnn diff --git a/src/armnn/layers/FakeQuantizationLayer.hpp b/src/armnn/layers/FakeQuantizationLayer.hpp index 09bd530f86..78e49e6474 100644 --- a/src/armnn/layers/FakeQuantizationLayer.hpp +++ b/src/armnn/layers/FakeQuantizationLayer.hpp @@ -30,6 +30,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a FakeQuantizationLayer. /// @param [in] param FakeQuantizationDescriptor to configure the fake quantization operation. diff --git a/src/armnn/layers/FullyConnectedLayer.cpp b/src/armnn/layers/FullyConnectedLayer.cpp index ca7a0cc4bb..0e5e5942de 100644 --- a/src/armnn/layers/FullyConnectedLayer.cpp +++ b/src/armnn/layers/FullyConnectedLayer.cpp @@ -101,4 +101,16 @@ void FullyConnectedLayer::Accept(ILayerVisitor& visitor) const visitor.VisitFullyConnectedLayer(this, GetParameters(), weightsTensor, optionalBiasTensor, GetName()); } +void FullyConnectedLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_Weight->GetTensorInfo(), m_Weight->Map(true)} }; + + if (GetParameters().m_BiasEnabled) + { + constTensors.emplace_back(ConstTensor(m_Bias->GetTensorInfo(), m_Bias->Map(true))); + } + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/FullyConnectedLayer.hpp b/src/armnn/layers/FullyConnectedLayer.hpp index bbacd2551d..4a9cbe1136 100644 --- a/src/armnn/layers/FullyConnectedLayer.hpp +++ b/src/armnn/layers/FullyConnectedLayer.hpp @@ -43,6 +43,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a FullyConnectedLayer. /// @param [in] param FullyConnectedDescriptor to configure the fully connected operation. diff --git a/src/armnn/layers/LayerWithParameters.hpp b/src/armnn/layers/LayerWithParameters.hpp index 3f3bdd8050..952eff66ff 100644 --- a/src/armnn/layers/LayerWithParameters.hpp +++ b/src/armnn/layers/LayerWithParameters.hpp @@ -48,6 +48,11 @@ protected: /// The parameters for the layer (not including tensor-valued weights etc.). Parameters m_Param; + + void ExecuteStrategy(IStrategy& strategy) const override + { + strategy.ExecuteStrategy(this, GetParameters(), {}, GetName()); + } }; } // namespace diff --git a/src/armnn/layers/LstmLayer.cpp b/src/armnn/layers/LstmLayer.cpp index 8e396ab70c..ebc408a636 100644 --- a/src/armnn/layers/LstmLayer.cpp +++ b/src/armnn/layers/LstmLayer.cpp @@ -480,4 +480,150 @@ void LstmLayer::Accept(ILayerVisitor& visitor) const visitor.VisitLstmLayer(this, GetParameters(), inputParams, GetName()); } +void LstmLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<ConstTensor> constTensors; + + LstmDescriptor descriptor = GetParameters(); + + // First add mandatory/basic parameters + if (m_BasicParameters.m_InputToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_InputToForgetWeights->GetTensorInfo(), + m_BasicParameters.m_InputToForgetWeights->Map(true))); + } + if (m_BasicParameters.m_InputToCellWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_InputToCellWeights->GetTensorInfo(), + m_BasicParameters.m_InputToCellWeights->Map(true))); + } + if (m_BasicParameters.m_InputToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_InputToOutputWeights->GetTensorInfo(), + m_BasicParameters.m_InputToOutputWeights->Map(true))); + } + if (m_BasicParameters.m_RecurrentToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_BasicParameters.m_RecurrentToForgetWeights->GetTensorInfo(), + m_BasicParameters.m_RecurrentToForgetWeights->Map(true))); + } + if (m_BasicParameters.m_RecurrentToCellWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_BasicParameters.m_RecurrentToCellWeights->GetTensorInfo(), + m_BasicParameters.m_RecurrentToCellWeights->Map(true))); + } + if (m_BasicParameters.m_RecurrentToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_BasicParameters.m_RecurrentToOutputWeights->GetTensorInfo(), + m_BasicParameters.m_RecurrentToOutputWeights->Map(true))); + } + if (m_BasicParameters.m_ForgetGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_ForgetGateBias->GetTensorInfo(), + m_BasicParameters.m_ForgetGateBias->Map(true))); + } + if (m_BasicParameters.m_CellBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_CellBias->GetTensorInfo(), + m_BasicParameters.m_CellBias->Map(true))); + } + if (m_BasicParameters.m_OutputGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_OutputGateBias->GetTensorInfo(), + m_BasicParameters.m_OutputGateBias->Map(true))); + } + + // Add cifg parameters + if (!descriptor.m_CifgEnabled) + { + if (m_CifgParameters.m_InputToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_CifgParameters.m_InputToInputWeights->GetTensorInfo(), + m_CifgParameters.m_InputToInputWeights->Map(true))); + } + if (m_CifgParameters.m_RecurrentToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_CifgParameters.m_RecurrentToInputWeights->GetTensorInfo(), + m_CifgParameters.m_RecurrentToInputWeights->Map(true))); + } + if (m_CifgParameters.m_InputGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_CifgParameters.m_InputGateBias->GetTensorInfo(), + m_CifgParameters.m_InputGateBias->Map(true))); + } + } + + // Add peephole parameters + if (descriptor.m_PeepholeEnabled) + { + if (!descriptor.m_CifgEnabled) + { + if (m_PeepholeParameters.m_CellToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_PeepholeParameters.m_CellToInputWeights->GetTensorInfo(), + m_PeepholeParameters.m_CellToInputWeights->Map(true))); + } + } + if (m_PeepholeParameters.m_CellToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_PeepholeParameters.m_CellToForgetWeights->GetTensorInfo(), + m_PeepholeParameters.m_CellToForgetWeights->Map(true))); + } + if (m_PeepholeParameters.m_CellToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_PeepholeParameters.m_CellToOutputWeights->GetTensorInfo(), + m_PeepholeParameters.m_CellToOutputWeights->Map(true))); + } + } + + // Add projection parameters + if (descriptor.m_ProjectionEnabled) + { + if (m_ProjectionParameters.m_ProjectionWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_ProjectionParameters.m_ProjectionWeights->GetTensorInfo(), + m_ProjectionParameters.m_ProjectionWeights->Map(true))); + } + if (m_ProjectionParameters.m_ProjectionBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_ProjectionParameters.m_ProjectionBias->GetTensorInfo(), + m_ProjectionParameters.m_ProjectionBias->Map(true))); + } + } + + // Add norm parameters + if (descriptor.m_LayerNormEnabled) + { + if (!descriptor.m_CifgEnabled) + { + if (m_LayerNormParameters.m_InputLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_InputLayerNormWeights->Map(true))); + } + } + if (m_LayerNormParameters.m_ForgetLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_ForgetLayerNormWeights->Map(true))); + } + if (m_LayerNormParameters.m_CellLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_CellLayerNormWeights->Map(true))); + } + if (m_LayerNormParameters.m_OutputLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_OutputLayerNormWeights->Map(true))); + } + } + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/LstmLayer.hpp b/src/armnn/layers/LstmLayer.hpp index 51348d7015..30f952e276 100644 --- a/src/armnn/layers/LstmLayer.hpp +++ b/src/armnn/layers/LstmLayer.hpp @@ -107,6 +107,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a LstmLayer. /// @param [in] param LstmDescriptor to configure the lstm operation. diff --git a/src/armnn/layers/MemCopyLayer.cpp b/src/armnn/layers/MemCopyLayer.cpp index d9a802c23c..40c1b98012 100644 --- a/src/armnn/layers/MemCopyLayer.cpp +++ b/src/armnn/layers/MemCopyLayer.cpp @@ -55,4 +55,10 @@ void MemCopyLayer::Accept(ILayerVisitor& visitor) const throw armnn::Exception("MemCopyLayer should not appear in an input graph"); } +void MemCopyLayer::ExecuteStrategy(IStrategy& strategy) const +{ + IgnoreUnused(strategy); + throw armnn::Exception("FakeQuantizationLayer should not appear in an input graph"); +} + } // namespace armnn diff --git a/src/armnn/layers/MemCopyLayer.hpp b/src/armnn/layers/MemCopyLayer.hpp index 996d6872d3..b913c529e5 100644 --- a/src/armnn/layers/MemCopyLayer.hpp +++ b/src/armnn/layers/MemCopyLayer.hpp @@ -30,6 +30,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a MemCopyLayer. /// @param [in] name Optional name for the layer. diff --git a/src/armnn/layers/MemImportLayer.cpp b/src/armnn/layers/MemImportLayer.cpp index 3d1c702946..c96f92bc5e 100644 --- a/src/armnn/layers/MemImportLayer.cpp +++ b/src/armnn/layers/MemImportLayer.cpp @@ -55,4 +55,10 @@ void MemImportLayer::Accept(ILayerVisitor& visitor) const throw armnn::Exception("MemImportLayer should not appear in an input graph"); } +void MemImportLayer::ExecuteStrategy(IStrategy& strategy) const +{ + IgnoreUnused(strategy); + throw armnn::Exception("FakeQuantizationLayer should not appear in an input graph"); +} + } // namespace armnn diff --git a/src/armnn/layers/MemImportLayer.hpp b/src/armnn/layers/MemImportLayer.hpp index 1cbdaac00b..47379701c7 100644 --- a/src/armnn/layers/MemImportLayer.hpp +++ b/src/armnn/layers/MemImportLayer.hpp @@ -30,6 +30,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a MemImportLayer. /// @param [in] name Optional name for the layer. diff --git a/src/armnn/layers/PreCompiledLayer.cpp b/src/armnn/layers/PreCompiledLayer.cpp index dbbc1fd716..75c1e46a84 100644 --- a/src/armnn/layers/PreCompiledLayer.cpp +++ b/src/armnn/layers/PreCompiledLayer.cpp @@ -55,4 +55,10 @@ void PreCompiledLayer::Accept(ILayerVisitor& visitor) const throw armnn::Exception("PreCompiledLayer should not appear in an input graph"); } +void PreCompiledLayer::ExecuteStrategy(IStrategy& strategy) const +{ + IgnoreUnused(strategy); + throw armnn::Exception("FakeQuantizationLayer should not appear in an input graph"); +} + } // namespace armnn diff --git a/src/armnn/layers/PreCompiledLayer.hpp b/src/armnn/layers/PreCompiledLayer.hpp index a4851c778f..2ed87578a4 100644 --- a/src/armnn/layers/PreCompiledLayer.hpp +++ b/src/armnn/layers/PreCompiledLayer.hpp @@ -35,6 +35,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + private: PreCompiledLayer(const PreCompiledLayer& other) = delete; PreCompiledLayer& operator=(const PreCompiledLayer& other) = delete; diff --git a/src/armnn/layers/QLstmLayer.cpp b/src/armnn/layers/QLstmLayer.cpp index 85f99bddf9..d957bbb485 100644 --- a/src/armnn/layers/QLstmLayer.cpp +++ b/src/armnn/layers/QLstmLayer.cpp @@ -503,4 +503,130 @@ void QLstmLayer::Accept(ILayerVisitor& visitor) const visitor.VisitQLstmLayer(this, GetParameters(), inputParams, GetName()); } + +void QLstmLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<ConstTensor> constTensors; + + // First add mandatory/basic parameters + if (m_BasicParameters.m_InputToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_InputToForgetWeights->GetTensorInfo(), + m_BasicParameters.m_InputToForgetWeights->Map(true))); + } + if (m_BasicParameters.m_InputToCellWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_InputToCellWeights->GetTensorInfo(), + m_BasicParameters.m_InputToCellWeights->Map(true))); + } + if (m_BasicParameters.m_InputToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_InputToOutputWeights->GetTensorInfo(), + m_BasicParameters.m_InputToOutputWeights->Map(true))); + } + if (m_BasicParameters.m_RecurrentToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_BasicParameters.m_RecurrentToForgetWeights->GetTensorInfo(), + m_BasicParameters.m_RecurrentToForgetWeights->Map(true))); + } + if (m_BasicParameters.m_RecurrentToCellWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_BasicParameters.m_RecurrentToCellWeights->GetTensorInfo(), + m_BasicParameters.m_RecurrentToCellWeights->Map(true))); + } + if (m_BasicParameters.m_RecurrentToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_BasicParameters.m_RecurrentToOutputWeights->GetTensorInfo(), + m_BasicParameters.m_RecurrentToOutputWeights->Map(true))); + } + if (m_BasicParameters.m_ForgetGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_ForgetGateBias->GetTensorInfo(), + m_BasicParameters.m_ForgetGateBias->Map(true))); + } + if (m_BasicParameters.m_CellBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_CellBias->GetTensorInfo(), + m_BasicParameters.m_CellBias->Map(true))); + } + if (m_BasicParameters.m_OutputGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_BasicParameters.m_OutputGateBias->GetTensorInfo(), + m_BasicParameters.m_OutputGateBias->Map(true))); + } + + // Add cifig parameters + if (m_CifgParameters.m_InputToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_CifgParameters.m_InputToInputWeights->GetTensorInfo(), + m_CifgParameters.m_InputToInputWeights->Map(true))); + } + if (m_CifgParameters.m_RecurrentToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_CifgParameters.m_RecurrentToInputWeights->GetTensorInfo(), + m_CifgParameters.m_RecurrentToInputWeights->Map(true))); + } + if (m_CifgParameters.m_InputGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_CifgParameters.m_InputGateBias->GetTensorInfo(), + m_CifgParameters.m_InputGateBias->Map(true))); + } + + // Add peephole parameters + if (m_PeepholeParameters.m_CellToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_PeepholeParameters.m_CellToInputWeights->GetTensorInfo(), + m_PeepholeParameters.m_CellToInputWeights->Map(true))); + } + if (m_PeepholeParameters.m_CellToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_PeepholeParameters.m_CellToForgetWeights->GetTensorInfo(), + m_PeepholeParameters.m_CellToForgetWeights->Map(true))); + } + if (m_PeepholeParameters.m_CellToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_PeepholeParameters.m_CellToOutputWeights->GetTensorInfo(), + m_PeepholeParameters.m_CellToOutputWeights->Map(true))); + } + + // Add projection parameters + if (m_ProjectionParameters.m_ProjectionWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_ProjectionParameters.m_ProjectionWeights->GetTensorInfo(), + m_ProjectionParameters.m_ProjectionWeights->Map(true))); + } + if (m_ProjectionParameters.m_ProjectionBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_ProjectionParameters.m_ProjectionBias->GetTensorInfo(), + m_ProjectionParameters.m_ProjectionBias->Map(true))); + } + + // Add norm parameters + if (m_LayerNormParameters.m_InputLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_InputLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_InputLayerNormWeights->Map(true))); + } + if (m_LayerNormParameters.m_ForgetLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_ForgetLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_ForgetLayerNormWeights->Map(true))); + } + if (m_LayerNormParameters.m_CellLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_CellLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_CellLayerNormWeights->Map(true))); + } + if (m_LayerNormParameters.m_OutputLayerNormWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_LayerNormParameters.m_OutputLayerNormWeights->GetTensorInfo(), + m_LayerNormParameters.m_OutputLayerNormWeights->Map(true))); + } + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/QLstmLayer.hpp b/src/armnn/layers/QLstmLayer.hpp index 5757ef6559..70cc4f2b15 100644 --- a/src/armnn/layers/QLstmLayer.hpp +++ b/src/armnn/layers/QLstmLayer.hpp @@ -109,6 +109,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a QLstmLayer. /// @param [in] name Optional name for the layer. diff --git a/src/armnn/layers/QuantizedLstmLayer.cpp b/src/armnn/layers/QuantizedLstmLayer.cpp index 624e443064..578d9eb137 100644 --- a/src/armnn/layers/QuantizedLstmLayer.cpp +++ b/src/armnn/layers/QuantizedLstmLayer.cpp @@ -291,4 +291,91 @@ void QuantizedLstmLayer::Accept(ILayerVisitor& visitor) const visitor.VisitQuantizedLstmLayer(this, inputParams, GetName()); } +void QuantizedLstmLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<ConstTensor> constTensors; + + // InputToX weight tensors + if (m_QuantizedLstmParameters.m_InputToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_InputToInputWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_InputToInputWeights->Map(true))); + } + + if (m_QuantizedLstmParameters.m_InputToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_InputToForgetWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_InputToForgetWeights->Map(true))); + } + + if (m_QuantizedLstmParameters.m_InputToCellWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_InputToCellWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_InputToCellWeights->Map(true))); + } + + if (m_QuantizedLstmParameters.m_InputToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_InputToOutputWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_InputToOutputWeights->Map(true))); + } + + // RecurrentToX weight tensors + if (m_QuantizedLstmParameters.m_RecurrentToInputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_QuantizedLstmParameters.m_RecurrentToInputWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_RecurrentToInputWeights->Map(true))); + } + + if (m_QuantizedLstmParameters.m_RecurrentToForgetWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_QuantizedLstmParameters.m_RecurrentToForgetWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_RecurrentToForgetWeights->Map(true))); + } + + if (m_QuantizedLstmParameters.m_RecurrentToCellWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_QuantizedLstmParameters.m_RecurrentToCellWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_RecurrentToCellWeights->Map(true))); + } + + if (m_QuantizedLstmParameters.m_RecurrentToOutputWeights != nullptr) + { + constTensors.emplace_back(ConstTensor( + m_QuantizedLstmParameters.m_RecurrentToOutputWeights->GetTensorInfo(), + m_QuantizedLstmParameters.m_RecurrentToOutputWeights->Map(true))); + } + + // Bias tensors + if (m_QuantizedLstmParameters.m_InputGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_InputGateBias->GetTensorInfo(), + m_QuantizedLstmParameters.m_InputGateBias->Map(true))); + } + + if (m_QuantizedLstmParameters.m_ForgetGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_ForgetGateBias->GetTensorInfo(), + m_QuantizedLstmParameters.m_ForgetGateBias->Map(true))); + } + + if (m_QuantizedLstmParameters.m_CellBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_CellBias->GetTensorInfo(), + m_QuantizedLstmParameters.m_CellBias->Map(true))); + } + + if (m_QuantizedLstmParameters.m_OutputGateBias != nullptr) + { + constTensors.emplace_back(ConstTensor(m_QuantizedLstmParameters.m_OutputGateBias->GetTensorInfo(), + m_QuantizedLstmParameters.m_OutputGateBias->Map(true))); + } + + + strategy.ExecuteStrategy(this, BaseDescriptor(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/QuantizedLstmLayer.hpp b/src/armnn/layers/QuantizedLstmLayer.hpp index bfe86a4629..544acbd816 100644 --- a/src/armnn/layers/QuantizedLstmLayer.hpp +++ b/src/armnn/layers/QuantizedLstmLayer.hpp @@ -71,6 +71,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a QuantizedLstmLayer. /// @param [in] name Optional name for the layer. diff --git a/src/armnn/layers/RankLayer.cpp b/src/armnn/layers/RankLayer.cpp index 2b0dffe370..3b14ef0d93 100644 --- a/src/armnn/layers/RankLayer.cpp +++ b/src/armnn/layers/RankLayer.cpp @@ -46,4 +46,9 @@ void RankLayer::Accept(ILayerVisitor& visitor) const visitor.VisitRankLayer(this, GetName()); } +void RankLayer::ExecuteStrategy(IStrategy& strategy) const +{ + strategy.ExecuteStrategy(this, BaseDescriptor(), {}, GetName()); +} + } //namespace armnn
\ No newline at end of file diff --git a/src/armnn/layers/RankLayer.hpp b/src/armnn/layers/RankLayer.hpp index f4f1ec9e66..fbd2824bb5 100644 --- a/src/armnn/layers/RankLayer.hpp +++ b/src/armnn/layers/RankLayer.hpp @@ -24,7 +24,9 @@ class RankLayer : public Layer void Accept(ILayerVisitor& visitor) const override; - protected: + void ExecuteStrategy(IStrategy& strategy) const override; + +protected: RankLayer(const char* name); ~RankLayer() = default; }; diff --git a/src/armnn/layers/TransposeConvolution2dLayer.cpp b/src/armnn/layers/TransposeConvolution2dLayer.cpp index 189e5f6168..bd8cb096e2 100644 --- a/src/armnn/layers/TransposeConvolution2dLayer.cpp +++ b/src/armnn/layers/TransposeConvolution2dLayer.cpp @@ -135,4 +135,16 @@ void TransposeConvolution2dLayer::Accept(ILayerVisitor& visitor) const visitor.VisitTransposeConvolution2dLayer(this, GetParameters(), weightsTensor, optionalBiasTensor, GetName()); } +void TransposeConvolution2dLayer::ExecuteStrategy(IStrategy& strategy) const +{ + std::vector<armnn::ConstTensor> constTensors { {m_Weight->GetTensorInfo(), m_Weight->Map(true)} }; + + if (GetParameters().m_BiasEnabled) + { + constTensors.emplace_back(ConstTensor(m_Bias->GetTensorInfo(), m_Bias->Map(true))); + } + + strategy.ExecuteStrategy(this, GetParameters(), constTensors, GetName()); +} + } // namespace armnn diff --git a/src/armnn/layers/TransposeConvolution2dLayer.hpp b/src/armnn/layers/TransposeConvolution2dLayer.hpp index 1ee984d231..903c957393 100644 --- a/src/armnn/layers/TransposeConvolution2dLayer.hpp +++ b/src/armnn/layers/TransposeConvolution2dLayer.hpp @@ -42,6 +42,8 @@ public: void Accept(ILayerVisitor& visitor) const override; + void ExecuteStrategy(IStrategy& strategy) const override; + protected: /// Constructor to create a TransposeConvolution2dLayer. /// @param [in] param TransposeConvolution2dDescriptor to configure the 2D transpose convolution operation. diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp index da85029373..67d0f95292 100644 --- a/src/armnn/test/QuantizerTest.cpp +++ b/src/armnn/test/QuantizerTest.cpp @@ -7,10 +7,8 @@ #include "../Network.hpp" #include "../NetworkQuantizerUtils.hpp" #include "../OverrideInputRangeVisitor.hpp" -#include "../RangeTracker.hpp" #include <armnn/INetwork.hpp> -#include <armnn/LayerVisitorBase.hpp> #include <armnn/Tensor.hpp> #include <armnn/Types.hpp> #include <armnn/utility/IgnoreUnused.hpp> @@ -37,45 +35,332 @@ const float g_TestTolerance = 0.000001f; BOOST_AUTO_TEST_SUITE(Quantizer) -class TestQuantization : public LayerVisitorBase<VisitorThrowingPolicy> +class TestQuantization : public IStrategy { public: - TestQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : LayerVisitorBase<VisitorThrowingPolicy>() - , m_InputShape(inputShape) - , m_OutputShape(outputShape) - , m_QuantizerOptions(QuantizerOptions()) {} + TestQuantization(const TensorShape &inputShape, const TensorShape &outputShape) + : m_InputShape(inputShape), m_OutputShape(outputShape), m_QuantizerOptions(QuantizerOptions()) + {} TestQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& outputShape) - : LayerVisitorBase<VisitorThrowingPolicy>() - , m_InputShape(inputShape) + : m_InputShape(inputShape) , m_OutputShape(outputShape) , m_QuantizerOptions(options) {} - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override + void ExecuteStrategy(const armnn::IConnectableLayer *layer, + const BaseDescriptor &descriptor, + const std::vector<armnn::ConstTensor> &constants, + const char *name, + const armnn::LayerBindingId id) override { IgnoreUnused(id, name); + + if (layer->GetType() == armnn::LayerType::Output) + { + const TensorInfo &info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + BOOST_TEST(m_OutputShape == info.GetShape()); + return; + } + const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - BOOST_TEST(m_InputShape == info.GetShape()); - // Based off current default [-15.0f, 15.0f] - TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase , 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); + + switch (layer->GetType()) + { + case armnn::LayerType::BatchToSpaceNd : + case armnn::LayerType::Permute : + case armnn::LayerType::Pooling2d : + case armnn::LayerType::Reshape : + case armnn::LayerType::Resize : + case armnn::LayerType::SpaceToBatchNd : + case armnn::LayerType::Splitter : + case armnn::LayerType::StridedSlice : + { + CheckDefaultQuantizationSettings(info); + break; + } + case armnn::LayerType::Addition : + { + + // Based off default static range [-20.0f, 20.0f] + TestQuantizationParams(info, {40.0f / g_AsymmU8QuantizationBase, 128}, + {40.0f / g_AsymmS8QuantizationBase, 0}, + {20.0f / g_SymmS8QuantizationBase, 0}, + {20.0f / g_SymmS16QuantizationBase, 0}); + break; + } + case armnn::LayerType::Activation : + { + const ActivationDescriptor& activationDescriptor = static_cast<const ActivationDescriptor&>(descriptor); + + switch (activationDescriptor.m_Function) + { + case ActivationFunction::BoundedReLu : + { + // Based off default static range [0.0f, 3.5f] + TestQuantizationParams(info, {3.5f / g_AsymmU8QuantizationBase, 0}, + {3.5f / g_AsymmS8QuantizationBase, -128}, + {3.5f / g_SymmS8QuantizationBase, 0}, + {3.5f / g_SymmS16QuantizationBase, 0}); + break; + } + case ActivationFunction::Elu : + { + TestQuantizationParams( + info, {30.0f / g_AsymmU8QuantizationBase, 128}, + {30.0f / g_AsymmS8QuantizationBase, 0}, + {15.0f / g_SymmS8QuantizationBase, 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); + break; + } + case ActivationFunction::HardSwish : + { + TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, + {30.0f / g_AsymmS8QuantizationBase, 0}, + {15.0f / g_SymmS8QuantizationBase, 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); + break; + } + case ActivationFunction::LeakyReLu : + { + // Based off default static range [-5.0f, 15.0f] + TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, + {20.0f / g_AsymmS8QuantizationBase,-64}, + {15.0f / g_SymmS8QuantizationBase , 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); + break; + } + case ActivationFunction::TanH : + { + TestQuantizationParams(info, {2.0f / g_AsymmU8QuantizationBase, 128}, + {2.0f / g_AsymmS8QuantizationBase, 0}, + {1.0f / g_SymmS8QuantizationBase , 0}, + {1.0f / g_SymmS16QuantizationBase, 0}); + break; + } + default: + { + // Based off default static range [0.0f, 15.0f] + TestQuantizationParams(info, {15.0f / g_AsymmU8QuantizationBase, 0}, + {15.0f / g_AsymmS8QuantizationBase, -128}, + {15.0f / g_SymmS8QuantizationBase, 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); + break; + } + } + break; + } + case armnn::LayerType::ArgMinMax : + { + const ArgMinMaxDescriptor& argMinMaxDescriptor = static_cast<const ArgMinMaxDescriptor&>(descriptor); + + if(argMinMaxDescriptor.m_Function == ArgMinMaxFunction::Max) + { + break; + } + TestQuantizationParams(info, + { 30.0f / g_AsymmU8QuantizationBase, 128 }, + { 30.0f / g_AsymmS8QuantizationBase, 0}, + { 15.0f / g_SymmS8QuantizationBase, 0}, + { 15.0f / g_SymmS16QuantizationBase, 0 }); + break; + } + case armnn::LayerType::BatchNormalization : + { + + // Based off default static range [-15.0f, 15.0f] + TestQuantizationParams( + info, {30.0f / g_AsymmU8QuantizationBase, 128}, + {30.0f / g_AsymmS8QuantizationBase, 0}, + {15.0f / g_SymmS8QuantizationBase, 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); + + // Test constants + TestConstantQuantizationParams(constants[0].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); + TestConstantQuantizationParams(constants[1].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); + TestConstantQuantizationParams(constants[2].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); + TestConstantQuantizationParams(constants[3].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); + break; + } + case armnn::LayerType::Comparison : + { + + const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; + const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; + + TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); + + break; + } + case armnn::LayerType::Constant : + { + + // Based off the range of values in the const tensor used for the test: [-2.0f, 6.0f] + TestQuantizationParams(info, {8.0f / g_AsymmU8QuantizationBase, 64}, + {8.0f / g_AsymmS8QuantizationBase, -64}, + {6.0f / g_SymmS8QuantizationBase, 0}, + {6.0f / g_SymmS16QuantizationBase, 0}); + + break; + } + case armnn::LayerType::Convolution2d : + { + if (constants.size() == 1) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); + } + else if (constants.size() == 1) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); + } + break; + } + case armnn::LayerType::DepthwiseConvolution2d : + { + if (constants.size() == 2) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); + } + else if (constants.size() == 1) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); + } + break; + } + case armnn::LayerType::DepthToSpace : + { + const OffsetScalePair qAsymmU8Params{30.0f / g_AsymmU8QuantizationBase, 128}; + const OffsetScalePair qAsymmS8Params{30.0f / g_AsymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS8Params{15.0f / g_SymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS16Params{15.0f / g_SymmS16QuantizationBase, 0}; + + TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); + break; + } + case armnn::LayerType::FullyConnected : + { + if (constants.size() == 2) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); + } + else if (constants.size() == 1) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); + } + + break; + } + case armnn::LayerType::Fill : + { + const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; + const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; + + TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); + break; + } + case armnn::LayerType::Input : + { + BOOST_TEST(m_InputShape == info.GetShape()); + // Based off current default [-15.0f, 15.0f] + TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, + {30.0f / g_AsymmS8QuantizationBase, 0}, + {15.0f / g_SymmS8QuantizationBase, 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); + break; + } + case armnn::LayerType::InstanceNormalization : + { + const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; + const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; + + TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); + break; + } + case armnn::LayerType::LogSoftmax : + { + const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; + const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; + const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; + + TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); + break; + } + case armnn::LayerType::Slice : + { + const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; + const OffsetScalePair qAsymmS8Params{ 30.0f / g_AsymmS8QuantizationBase, 0 }; + const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0 }; + const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; + + TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); + break; + } + case armnn::LayerType::Softmax : + { + // Based off default static range [0.0f, 1.0f] + TestQuantizationParams(info, {1.0f / g_AsymmU8QuantizationBase, 0}, + {1.0f / g_AsymmS8QuantizationBase, -128}, + {1.0f / g_SymmS8QuantizationBase, 0}, + {1.0f / g_SymmS16QuantizationBase, 0}); + break; + } + case armnn::LayerType::SpaceToDepth : + { + TestQuantizationParams(info, + { 30.0f / g_AsymmU8QuantizationBase, 128 }, + { 30.0f / g_AsymmS8QuantizationBase, 0 }, + { 15.0f / g_SymmS8QuantizationBase, 0 }, + { 15.0f / g_SymmS16QuantizationBase, 0 }); + + break; + } + case armnn::LayerType::Stack : + { + TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); + + TestQuantizationParams(outputInfo, + { 30.0f / g_AsymmU8QuantizationBase, 128 }, + { 30.0f / g_AsymmS8QuantizationBase, 0}, + { 15.0f / g_SymmS8QuantizationBase, 0}, + { 15.0f / g_SymmS16QuantizationBase, 0 }); + break; + } + case armnn::LayerType::TransposeConvolution2d : + { + if (constants.size() == 2) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); + } + else if (constants.size() == 1) + { + TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); + } + break; + } + default: + { + throw UnimplementedException("Unimplemented layer encountered"); + } + } } - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override + +protected: + + void CheckDefaultQuantizationSettings(const TensorInfo& info) { - IgnoreUnused(id, name); - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_TEST(m_OutputShape == info.GetShape()); + TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, + {20.0f / g_AsymmS8QuantizationBase,-64}, + {15.0f / g_SymmS8QuantizationBase, 0}, + {15.0f / g_SymmS16QuantizationBase, 0}); } -protected: void TestQuantizationParams(const TensorInfo& info, const OffsetScalePair& qAsymmU8Params, const OffsetScalePair& qAsymmS8Params, @@ -188,39 +473,41 @@ private: QuantizerOptions m_QuantizerOptions; }; -void VisitLayersTopologically(const INetwork* inputNetwork, ILayerVisitor& visitor) +void VisitLayersTopologically(const INetwork* inputNetwork, IStrategy& strategy) { auto network = PolymorphicDowncast<const Network*>(inputNetwork); auto graph = network->GetGraph().TopologicalSort(); - VisitLayers(graph, visitor); + ApplyStrategyToLayers(graph, strategy); } -class TestAdditionQuantization : public TestQuantization +void TestNetwork(INetwork* network, const TensorShape inShape, const TensorShape outShape) { -public: - TestAdditionQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} + const QuantizerOptions qAsymmU8Options(DataType::QAsymmU8); + INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network, qAsymmU8Options)->ExportNetwork(); + TestQuantization validatorQAsymmU8(inShape, outShape); + VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - TestAdditionQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} + const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); + INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network, qAsymmS8Options)->ExportNetwork(); + TestQuantization validatorQAsymmS8(qAsymmS8Options, inShape, outShape); + VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - void VisitAdditionLayer(const IConnectableLayer* layer, - const char* name = nullptr) override - { - IgnoreUnused(name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); + const QuantizerOptions qSymmS8Options(DataType::QSymmS8); + INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network, qSymmS8Options)->ExportNetwork(); + TestQuantization validatorQSymmS8(qSymmS8Options, inShape, outShape); + VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - // Based off default static range [-20.0f, 20.0f] - TestQuantizationParams(info, {40.0f / g_AsymmU8QuantizationBase, 128}, - {40.0f / g_AsymmS8QuantizationBase, 0}, - {20.0f / g_SymmS8QuantizationBase, 0}, - {20.0f / g_SymmS16QuantizationBase, 0}); - } -}; + const QuantizerOptions qSymmS16options(DataType::QSymmS16); + INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network, qSymmS16options)->ExportNetwork(); + TestQuantization validatorQSymmS16(qSymmS16options, inShape, outShape); + VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); +} +void TestNetwork(INetwork* network, const TensorShape shape) +{ + TestNetwork(network, shape, shape); +} BOOST_AUTO_TEST_CASE(QuantizeAddition) { @@ -244,54 +531,9 @@ BOOST_AUTO_TEST_CASE(QuantizeAddition) input1->GetOutputSlot(0).SetTensorInfo(info); addition->GetOutputSlot(0).SetTensorInfo(info); - const QuantizerOptions qAsymmU8Options(DataType::QAsymmU8); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get(), qAsymmU8Options)->ExportNetwork(); - TestAdditionQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestAdditionQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestAdditionQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestAdditionQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } -class TestActivationQuantization : public TestQuantization -{ -public: - TestActivationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestActivationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [0.0f, 15.0f] - TestQuantizationParams(info, {15.0f / g_AsymmU8QuantizationBase, 0}, - {15.0f / g_AsymmS8QuantizationBase, -128}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - } -}; - INetworkPtr CreateNetworkWithActivationLayer(const ActivationDescriptor& descriptor, const TensorShape& shape) { INetworkPtr network = INetwork::Create(); @@ -313,28 +555,6 @@ INetworkPtr CreateNetworkWithActivationLayer(const ActivationDescriptor& descrip return network; } -class TestArgMinMaxQuantization : public TestQuantization -{ -public: - TestArgMinMaxQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestArgMinMaxQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor&, - const char* name = nullptr) override - { - IgnoreUnused(name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - BOOST_CHECK(info.GetDataType() == DataType::Signed32); - } -}; - INetworkPtr CreateNetworkWithArgMinMaxLayer(const ArgMinMaxDescriptor& descriptor, const TensorShape& shape) { INetworkPtr network = INetwork::Create(); @@ -417,34 +637,47 @@ BOOST_AUTO_TEST_CASE(InputOutputLayerDynamicQuant) std::unique_ptr<IQuantizationScheme> quantizationScheme = std::make_unique<QAsymmU8QuantizationScheme>(); OffsetScalePair qParams = quantizationScheme->ComputeScheme(-77.0, 98.0); - class TestOutputLayerVisitor : public LayerVisitorBase<VisitorNoThrowPolicy> - { - public: - TestOutputLayerVisitor(const OffsetScalePair& offsetScalePair, const DataType& dataType) : +class TestOutputStrategy : public IStrategy +{ + public : + TestOutputStrategy(const OffsetScalePair& offsetScalePair, const DataType& dataType) : m_OffsetScalePair(offsetScalePair), m_DataType(dataType) {} - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override + { + IgnoreUnused(name, constants, id, descriptor); + + switch (layer->GetType()) { - IgnoreUnused(id, name); - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType, - std::string(armnn::GetDataTypeName(info.GetDataType())) - .append(" == ").append(armnn::GetDataTypeName(m_DataType))); - // int_32t - BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second); - // float - BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, boost::test_tools::tolerance(0.001)); + case armnn::LayerType::Output : + { + const TensorInfo &info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType, + std::string(armnn::GetDataTypeName(info.GetDataType())) + .append(" == ").append(armnn::GetDataTypeName(m_DataType))); + // int_32t + BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second); + // float + BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, + boost::test_tools::tolerance(0.001)); + break; + } + default: + {} } + } - private: - const OffsetScalePair m_OffsetScalePair; - const DataType m_DataType; - }; +private: + const OffsetScalePair m_OffsetScalePair; + const DataType m_DataType; +}; - TestOutputLayerVisitor visitor(qParams, quantizationScheme->GetDataType()); - quantizedNetwork->Accept(visitor); + TestOutputStrategy strategy(qParams, quantizationScheme->GetDataType()); + quantizedNetwork->ExecuteStrategy(strategy); } BOOST_AUTO_TEST_CASE(QuantizeAbsActivation) @@ -457,25 +690,7 @@ BOOST_AUTO_TEST_CASE(QuantizeAbsActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - const QuantizerOptions qAsymmU8Options(DataType::QAsymmU8); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get(), qAsymmU8Options)->ExportNetwork(); - TestActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeArgMax) @@ -486,25 +701,7 @@ BOOST_AUTO_TEST_CASE(QuantizeArgMax) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithArgMinMaxLayer(descriptor, shape); - const QuantizerOptions qAsymmU8Options(DataType::QAsymmU8); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get(), qAsymmU8Options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeLinearActivation) @@ -517,24 +714,8 @@ BOOST_AUTO_TEST_CASE(QuantizeLinearActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeReLuActivation) @@ -547,24 +728,7 @@ BOOST_AUTO_TEST_CASE(QuantizeReLuActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSoftReLuActivation) @@ -577,54 +741,11 @@ BOOST_AUTO_TEST_CASE(QuantizeSoftReLuActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeBoundedReluActivation) { - class TestBoundedReluActivationQuantization : public TestQuantization - { - public: - TestBoundedReluActivationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestBoundedReluActivationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [0.0f, 3.5f] - TestQuantizationParams(info, {3.5f / g_AsymmU8QuantizationBase, 0}, - {3.5f / g_AsymmS8QuantizationBase, -128}, - {3.5f / g_SymmS8QuantizationBase, 0}, - {3.5f / g_SymmS16QuantizationBase, 0}); - } - }; - ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::BoundedReLu; descriptor.m_A = 3.5f; @@ -633,55 +754,11 @@ BOOST_AUTO_TEST_CASE(QuantizeBoundedReluActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestBoundedReluActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestBoundedReluActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestBoundedReluActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestBoundedReluActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeTanHActivation) { - class TestTanHActivationQuantization : public TestQuantization - { - public: - TestTanHActivationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestTanHActivationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-1.0f, 1.0f] - TestQuantizationParams( - info, {2.0f / g_AsymmU8QuantizationBase, 128}, - {2.0f / g_AsymmS8QuantizationBase, 0}, - {1.0f / g_SymmS8QuantizationBase , 0}, - {1.0f / g_SymmS16QuantizationBase, 0}); - } - }; - ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::TanH; descriptor.m_A = 3.5f; @@ -690,64 +767,9 @@ BOOST_AUTO_TEST_CASE(QuantizeTanHActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestTanHActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestTanHActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestTanHActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestTanHActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } -class TestLeakyReLuActivationQuantization : public TestQuantization -{ -public: - TestLeakyReLuActivationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestLeakyReLuActivationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-5.0f, 15.0f] - TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, - {20.0f / g_AsymmS8QuantizationBase,-64}, - {15.0f / g_SymmS8QuantizationBase , 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - } - -protected: - // Used by the descendant classes which test layers - // that are forwarding their parent layer settings - void CheckForwardedQuantizationSettings(const IConnectableLayer* layer) - { - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, - {20.0f / g_AsymmS8QuantizationBase,-64}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - } -}; - BOOST_AUTO_TEST_CASE(QuantizeLeakyReLuActivation) { ActivationDescriptor descriptor; @@ -758,176 +780,34 @@ BOOST_AUTO_TEST_CASE(QuantizeLeakyReLuActivation) const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestLeakyReLuActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestLeakyReLuActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestLeakyReLuActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestLeakyReLuActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeELuActivation) { - class TestEluActivationQuantization : public TestQuantization - { - public: - TestEluActivationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestEluActivationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - } - }; - ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::Elu; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestEluActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestEluActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestEluActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestEluActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeHardSwishActivation) { - class TestHardSwishActivationQuantization : public TestQuantization - { - public: - TestHardSwishActivationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestHardSwishActivationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitActivationLayer(const IConnectableLayer* layer, - const ActivationDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - } - }; - ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::HardSwish; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestHardSwishActivationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestHardSwishActivationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestHardSwishActivationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestHardSwishActivationQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeBatchNorm) { - class TestBatchNormalizationQuantization : public TestQuantization - { - public: - TestBatchNormalizationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestBatchNormalizationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitBatchNormalizationLayer(const IConnectableLayer* layer, - const BatchNormalizationDescriptor& desc, - const ConstTensor& mean, - const ConstTensor& variance, - const ConstTensor& beta, - const ConstTensor& gamma, - const char* name = nullptr) override - { - IgnoreUnused(desc, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - - // Test constants - TestConstantQuantizationParams(mean.GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - TestConstantQuantizationParams(variance.GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - TestConstantQuantizationParams(beta.GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - TestConstantQuantizationParams(gamma.GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{3U}; @@ -958,55 +838,11 @@ BOOST_AUTO_TEST_CASE(QuantizeBatchNorm) input0->GetOutputSlot(0).SetTensorInfo(info); batchNorm->GetOutputSlot(0).SetTensorInfo(info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestBatchNormalizationQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestBatchNormalizationQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestBatchNormalizationQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions QQsymm16Options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), QQsymm16Options)->ExportNetwork(); - TestBatchNormalizationQuantization validatorQSymmS16(QQsymm16Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeDepthToSpace) { - class TestDepthToSpaceQuantization : public TestQuantization - { - public: - TestDepthToSpaceQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestDepthToSpaceQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - virtual void VisitDepthToSpaceLayer(const IConnectableLayer* layer, - const DepthToSpaceDescriptor& desc, - const char* name = nullptr) - { - IgnoreUnused(desc, name); - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params{ 30.0f / g_AsymmS8QuantizationBase, 0 }; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0 }; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - } - }; - const TensorShape inputShape { 1, 2, 2, 4 }; const TensorShape outputShape{ 1, 4, 4, 1 }; @@ -1026,28 +862,7 @@ BOOST_AUTO_TEST_CASE(QuantizeDepthToSpace) inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); depthToSpaceLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestDepthToSpaceQuantization validatorQAsymmU8(inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - // test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestDepthToSpaceQuantization validatorQAsymmS8(qAsymmS8Options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestDepthToSpaceQuantization validatorQSymmS8(qSymmS8Options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QSymmS16 quantization - const QuantizerOptions Qsymm16Options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); - TestDepthToSpaceQuantization validatorQSymmS16(Qsymm16Options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), inputShape, outputShape); } BOOST_AUTO_TEST_CASE(OverrideInputRangeEmptyNetwork) @@ -1058,8 +873,8 @@ BOOST_AUTO_TEST_CASE(OverrideInputRangeEmptyNetwork) Network network; // Empty network auto inputLayers = network.GetGraph().GetInputLayers(); // Empty list of input layers - OverrideInputRangeVisitor overrideInputRangeVisitor(ranges, 0, minMaxRange); - VisitLayers(inputLayers, overrideInputRangeVisitor); + OverrideInputRangeStrategy overrideInputRangeStrategy(ranges, 0, minMaxRange); + ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy); BOOST_CHECK(ranges.IsEmpty()); // Check that the map of ranges remained untouched } @@ -1073,8 +888,8 @@ BOOST_AUTO_TEST_CASE(OverrideInputRangeNoInputLayers) network.AddAdditionLayer(); // Network with no input layers auto inputLayers = network.GetGraph().GetInputLayers(); // Empty list of input layers - OverrideInputRangeVisitor overrideInputRangeVisitor(ranges, 0, minMaxRange); - VisitLayers(inputLayers, overrideInputRangeVisitor); + OverrideInputRangeStrategy overrideInputRangeStrategy(ranges, 0, minMaxRange); + ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy); BOOST_CHECK(ranges.IsEmpty()); // Check that the map of ranges remained untouched } @@ -1107,15 +922,15 @@ BOOST_AUTO_TEST_CASE(OverrideInputRangeInputLayers) auto inputLayers = network.GetGraph().GetInputLayers(); // List of input layers // Trying to override the input range for the input layer with binding id 3 (does not exist in the network) - OverrideInputRangeVisitor overrideInputRangeVisitorLayer3(ranges, 3, minMaxRange); - VisitLayers(inputLayers, overrideInputRangeVisitorLayer3); + OverrideInputRangeStrategy overrideInputRangeStrategy3(ranges, 3, minMaxRange); + ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy3); // Check that the map of ranges remained untouched BOOST_CHECK(ranges.IsEmpty()); // Override the input range for the input layer with binding id 1 - OverrideInputRangeVisitor overrideInputRangeVisitorLayer1(ranges, 1, minMaxRange); - VisitLayers(inputLayers, overrideInputRangeVisitorLayer1); + OverrideInputRangeStrategy overrideInputRangeStrategy1(ranges, 1, minMaxRange); + ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy1); // Check that the map of ranges has been populated BOOST_CHECK(!ranges.IsEmpty()); @@ -1170,80 +985,14 @@ INetworkPtr CreateNetworkWithFullyConnectedLayer(const bool biasEnabled, void ValidateFullyConnectedLayer(const bool biasEnabled) { - class TestFullyConnectedQuantization : public TestQuantization - { - public: - TestFullyConnectedQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestFullyConnectedQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitFullyConnectedLayer(const IConnectableLayer* layer, - const FullyConnectedDescriptor& desc, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char* name = nullptr) override - { - IgnoreUnused(desc, name); - TestQuantizationOnLayersWithBiases(layer, weights, biases); - } - }; - const TensorShape shape{3U}; INetworkPtr network = CreateNetworkWithFullyConnectedLayer(biasEnabled, shape, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestFullyConnectedQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestFullyConnectedQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestFullyConnectedQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions Qsymm16Options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); - TestFullyConnectedQuantization validatorQSymmS16(Qsymm16Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeFill) { - class TestFillQuantization : public TestQuantization - { - public: - TestFillQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestFillQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - virtual void VisitFillLayer(const IConnectableLayer* layer, - const FillDescriptor& desc, - const char* name = nullptr) - { - IgnoreUnused(desc, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - } - }; - const TensorShape tensorShape{ 1U }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); @@ -1262,28 +1011,7 @@ BOOST_AUTO_TEST_CASE(QuantizeFill) inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); fillLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestFillQuantization validatorQAsymmU8(tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - // test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestFillQuantization validatorQAsymmS8(qAsymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestFillQuantization validatorQSymmS8(qSymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QuantisedSymmS16 quantization - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestFillQuantization validatorQSymmS16(qSymmS16options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), tensorShape); } BOOST_AUTO_TEST_CASE(QuantizeFullyConnected) @@ -1298,28 +1026,6 @@ BOOST_AUTO_TEST_CASE(QuantizeFullyConnectedBiasEnabled) void TestQuantizeConvolution2d(bool useBiases) { - class TestConv2dQuantization : public TestQuantization - { - public: - TestConv2dQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestConv2dQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitConvolution2dLayer(const IConnectableLayer *layer, - const Convolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name = nullptr) override - { - IgnoreUnused(convolution2dDescriptor, name); - TestQuantizationOnLayersWithBiases(layer, weights, biases); - } - }; - INetworkPtr network = INetwork::Create(); TensorShape shape{3U}; @@ -1352,24 +1058,7 @@ void TestQuantizeConvolution2d(bool useBiases) input0->GetOutputSlot(0).SetTensorInfo(info); conv2d->GetOutputSlot(0).SetTensorInfo(info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestConv2dQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestConv2dQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestConv2dQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions Qsymm16Options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); - TestConv2dQuantization validatorQSymmS16(Qsymm16Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeConvolution2d) @@ -1384,28 +1073,6 @@ BOOST_AUTO_TEST_CASE(QuantizeConvolution2dWithBiases) void TestQuantizeDepthwiseConvolution2d(bool useBiases) { - class TestDepthwiseConv2dQuantization : public TestQuantization - { - public: - TestDepthwiseConv2dQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestDepthwiseConv2dQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitDepthwiseConvolution2dLayer(const IConnectableLayer *layer, - const DepthwiseConvolution2dDescriptor& convolution2dDescriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name = nullptr) override - { - IgnoreUnused(convolution2dDescriptor, name); - TestQuantizationOnLayersWithBiases(layer, weights, biases); - } - }; - INetworkPtr network = INetwork::Create(); TensorShape shape{3U}; @@ -1438,24 +1105,7 @@ void TestQuantizeDepthwiseConvolution2d(bool useBiases) input0->GetOutputSlot(0).SetTensorInfo(info); depthwiseConv2d->GetOutputSlot(0).SetTensorInfo(info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestDepthwiseConv2dQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestDepthwiseConv2dQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestDepthwiseConv2dQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions Qsymm16Options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), Qsymm16Options)->ExportNetwork(); - TestDepthwiseConv2dQuantization validatorQSymmS16(Qsymm16Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeDepthwiseConvolution2d) @@ -1470,35 +1120,8 @@ BOOST_AUTO_TEST_CASE(QuantizeDepthwiseConvolution2dWithBiases) BOOST_AUTO_TEST_CASE(QuantizeInstanceNormalization) { - class TestInstanceNormalizationQuantization : public TestQuantization - { - public: - TestInstanceNormalizationQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestInstanceNormalizationQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - virtual void VisitInstanceNormalizationLayer(const IConnectableLayer* layer, - const InstanceNormalizationDescriptor& descriptor, - const char* name = nullptr) - { - IgnoreUnused(descriptor, name); - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - } - }; - - const TensorShape tensorShape{ 1, 4, 4, 1 }; - const TensorInfo tensorInfo(tensorShape, DataType::Float32); + const TensorShape shape{ 1, 4, 4, 1 }; + const TensorInfo tensorInfo(shape, DataType::Float32); INetworkPtr network = INetwork::Create(); @@ -1512,59 +1135,11 @@ BOOST_AUTO_TEST_CASE(QuantizeInstanceNormalization) inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); instanceNormLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestInstanceNormalizationQuantization validatorQAsymmU8(tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - //test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestInstanceNormalizationQuantization validatorQAsymmS8(qAsymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestInstanceNormalizationQuantization validatorQSymmS8(qSymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QSymmS16 quantization - const QuantizerOptions qSymmS16Options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16Options)->ExportNetwork(); - TestInstanceNormalizationQuantization validatorQSymmS16(qSymmS16Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeLogSoftmax) { - class TestLogSoftmaxQuantization : public TestQuantization - { - public: - TestLogSoftmaxQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestLogSoftmaxQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitLogSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - } - }; - const TensorShape tensorShape{ 1U }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); @@ -1583,28 +1158,7 @@ BOOST_AUTO_TEST_CASE(QuantizeLogSoftmax) inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); logSoftmaxLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestLogSoftmaxQuantization validatorQAsymmU8(tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - // test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestLogSoftmaxQuantization validatorQAsymmS8(qAsymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestLogSoftmaxQuantization validatorQSymmS8(qSymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QuantisedSymmS16 quantization - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestLogSoftmaxQuantization validatorQSymmS16(qSymmS16options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), tensorShape); } INetworkPtr CreateNetworkWithSoftmaxLayer(const SoftmaxDescriptor& descriptor, const TensorShape& shape) @@ -1630,57 +1184,13 @@ INetworkPtr CreateNetworkWithSoftmaxLayer(const SoftmaxDescriptor& descriptor, c BOOST_AUTO_TEST_CASE(QuantizeSoftmax) { - class TestSoftmaxQuantization : public TestQuantization - { - public: - TestSoftmaxQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestSoftmaxQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitSoftmaxLayer(const IConnectableLayer* layer, - const SoftmaxDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off default static range [0.0f, 1.0f] - TestQuantizationParams(info, {1.0f / g_AsymmU8QuantizationBase, 0}, - {1.0f / g_AsymmS8QuantizationBase, -128}, - {1.0f / g_SymmS8QuantizationBase, 0}, - {1.0f / g_SymmS16QuantizationBase, 0}); - } - }; - SoftmaxDescriptor descriptor; descriptor.m_Beta = 1.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithSoftmaxLayer(descriptor, shape); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestSoftmaxQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestSoftmaxQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestSoftmaxQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestSoftmaxQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeStandIn) @@ -1763,26 +1273,6 @@ void CompleteLeakyReluNetwork(INetwork* network, BOOST_AUTO_TEST_CASE(QuantizePermute) { - class TestPermuteQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestPermuteQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestPermuteQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - void VisitPermuteLayer(const IConnectableLayer* layer, - const PermuteDescriptor& desc, - const char* name = nullptr) override - { - IgnoreUnused(desc, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; @@ -1796,48 +1286,11 @@ BOOST_AUTO_TEST_CASE(QuantizePermute) CompleteLeakyReluNetwork(network.get(), activation, permute, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestPermuteQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestPermuteQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestPermuteQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestPermuteQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSpaceToBatch) { - class TestSpaceToBatchQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestSpaceToBatchQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestSpaceToBatchQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - void VisitSpaceToBatchNdLayer(const IConnectableLayer* layer, - const SpaceToBatchNdDescriptor& spaceToBatchNdDescriptor, - const char* name = nullptr) override - { - IgnoreUnused(spaceToBatchNdDescriptor, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; @@ -1851,54 +1304,11 @@ BOOST_AUTO_TEST_CASE(QuantizeSpaceToBatch) CompleteLeakyReluNetwork(network.get(), activation, spaceToBatch, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestSpaceToBatchQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestSpaceToBatchQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestSpaceToBatchQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestSpaceToBatchQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSpaceToDepth) { - class TestSpaceToDepthQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestSpaceToDepthQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) - {} - - TestSpaceToDepthQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) - {} - - void VisitSpaceToDepthLayer(const IConnectableLayer* layer, - const SpaceToDepthDescriptor&, - const char* = nullptr) override - { - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0 }, - { 15.0f / g_SymmS8QuantizationBase, 0 }, - { 15.0f / g_SymmS16QuantizationBase, 0 }); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{ 1u }; @@ -1909,48 +1319,11 @@ BOOST_AUTO_TEST_CASE(QuantizeSpaceToDepth) CompleteLeakyReluNetwork(network.get(), activation, spaceToDepth, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestSpaceToDepthQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestSpaceToDepthQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestSpaceToDepthQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestSpaceToDepthQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizePooling2d) { - class TestPooling2dQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestPooling2dQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestPooling2dQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - void VisitPooling2dLayer(const IConnectableLayer* layer, - const Pooling2dDescriptor& desc, - const char* name = nullptr) override - { - IgnoreUnused(desc, name); - CheckForwardedQuantizationSettings(layer); - } - }; - auto network = INetwork::Create(); TensorShape shape{1U}; @@ -1978,54 +1351,11 @@ BOOST_AUTO_TEST_CASE(QuantizePooling2d) activation->GetOutputSlot(0).SetTensorInfo(info); pooling2d->GetOutputSlot(0).SetTensorInfo(info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestPooling2dQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestPooling2dQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestPooling2dQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestPooling2dQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeConstant) { - class TestConstantQuantization : public TestAdditionQuantization - { - public: - TestConstantQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestAdditionQuantization(inputShape, outputShape) {} - - TestConstantQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestAdditionQuantization(options, inputShape, outputShape) {} - - void VisitConstantLayer(const IConnectableLayer* layer, - const ConstTensor& input, - const char* name = nullptr) override - { - IgnoreUnused(input, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - // Based off the range of values in the const tensor used for the test: [-2.0f, 6.0f] - TestQuantizationParams(info, {8.0f / g_AsymmU8QuantizationBase, 64}, - {8.0f / g_AsymmS8QuantizationBase, -64}, - {6.0f / g_SymmS8QuantizationBase, 0}, - {6.0f / g_SymmS16QuantizationBase, 0}); - } - }; - INetworkPtr network = INetwork::Create(); // Constant layer data @@ -2050,68 +1380,11 @@ BOOST_AUTO_TEST_CASE(QuantizeConstant) addition->GetOutputSlot(0).SetTensorInfo(tensorInfo); constant->GetOutputSlot(0).SetTensorInfo(tensorInfo); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestConstantQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestConstantQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestConstantQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestConstantQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeArgMinMax) { - class TestArgMinMaxQuantization : public TestQuantization - { - public: - TestArgMinMaxQuantization(const TensorShape& inputShape, const TensorShape& outputShape) : - TestQuantization(inputShape, outputShape) {} - - TestArgMinMaxQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) : - TestQuantization(options, inputShape, outputShape) - {} - - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(layer, id, name); - } - - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(layer, id, name); - } - void VisitArgMinMaxLayer(const IConnectableLayer* layer, - const ArgMinMaxDescriptor& argMinMaxDescriptor, - const char* name = nullptr) override - { - IgnoreUnused(argMinMaxDescriptor, name); - TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); - - TestQuantizationParams(outputInfo, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS16QuantizationBase, 0 }); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape inputShape{ 1, 1, 1, 5 }; @@ -2139,55 +1412,11 @@ BOOST_AUTO_TEST_CASE(QuantizeArgMinMax) input->GetOutputSlot(0).SetTensorInfo(inputInfo); argMinMaxLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestArgMinMaxQuantization validatorQAsymmU8(inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQAsymmS8(qAsymmS8Options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQSymmS8(qSymmS8Options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestArgMinMaxQuantization validatorQSymmS16(qSymmS16options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), inputShape, outputShape); } BOOST_AUTO_TEST_CASE(QuantizeComparison) { - class TestComparisonQuantization : public TestQuantization - { - public: - TestComparisonQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestComparisonQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void VisitComparisonLayer(const IConnectableLayer* layer, - const ComparisonDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - } - }; - const TensorShape tensorShape{ 1u }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); @@ -2207,28 +1436,7 @@ BOOST_AUTO_TEST_CASE(QuantizeComparison) inputLayer1->GetOutputSlot(0).SetTensorInfo(tensorInfo); comparisonLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestComparisonQuantization validatorQAsymmU8(tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - // test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestComparisonQuantization validatorQAsymmS8(qAsymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestComparisonQuantization validatorQSymmS8(qSymmS8Options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QuantisedSymmS16 quantization - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestComparisonQuantization validatorQSymmS16(qSymmS16options, tensorShape, tensorShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), tensorShape); } BOOST_AUTO_TEST_CASE(QuantizeConcat) @@ -2244,38 +1452,42 @@ BOOST_AUTO_TEST_CASE(QuantizeConcat) const TensorShape& outputShape) : TestQuantization(options, inputShape, outputShape) {} - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(layer, id, name); - } - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override { - IgnoreUnused(layer, id, name); - } - void VisitConcatLayer(const IConnectableLayer* layer, - const OriginsDescriptor& originsDescriptor, - const char* name = nullptr) override - { - IgnoreUnused(originsDescriptor, name); - TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams( - outputInfo, {60.8f / g_AsymmU8QuantizationBase, 65}, + IgnoreUnused(name, constants, id, descriptor); + + switch (layer->GetType()) + { + case armnn::LayerType::Input : + break; + case armnn::LayerType::Output : + break; + case armnn::LayerType::Concat : + { + TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); + TestQuantizationParams( + outputInfo, {60.8f / g_AsymmU8QuantizationBase, 65}, {60.8f / g_SymmS8QuantizationBase, -63}, {45.3f / g_SymmS8QuantizationBase, 0}, {45.3f / g_SymmS16QuantizationBase, 0}); - TensorInfo inputInfo0 = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - TensorInfo inputInfo1 = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); - TensorInfo inputInfo2 = layer->GetInputSlot(2).GetConnection()->GetTensorInfo(); + TensorInfo inputInfo0 = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + TensorInfo inputInfo1 = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); + TensorInfo inputInfo2 = layer->GetInputSlot(2).GetConnection()->GetTensorInfo(); - TestDifferentQuantizationScale(inputInfo0, inputInfo1); - TestDifferentQuantizationScale(inputInfo0, inputInfo2); - TestDifferentQuantizationScale(inputInfo1, inputInfo2); - TestDifferentQuantizationScale(inputInfo0, outputInfo); + TestDifferentQuantizationScale(inputInfo0, inputInfo1); + TestDifferentQuantizationScale(inputInfo0, inputInfo2); + TestDifferentQuantizationScale(inputInfo1, inputInfo2); + TestDifferentQuantizationScale(inputInfo0, outputInfo); + break; + } + default: + {} + } } }; @@ -2341,26 +1553,6 @@ BOOST_AUTO_TEST_CASE(QuantizeConcat) BOOST_AUTO_TEST_CASE(QuantizeReshape) { - class TestReshapeQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestReshapeQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestReshapeQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - virtual void VisitReshapeLayer(const IConnectableLayer* layer, - const ReshapeDescriptor& reshapeDescriptor, - const char* name = nullptr) override - { - IgnoreUnused(reshapeDescriptor, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; @@ -2374,48 +1566,11 @@ BOOST_AUTO_TEST_CASE(QuantizeReshape) CompleteLeakyReluNetwork(network.get(), activation, reshape, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestReshapeQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestReshapeQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestReshapeQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestReshapeQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSplitter) { - class TestSplitterQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestSplitterQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestSplitterQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - virtual void VisitSplitterLayer(const IConnectableLayer* layer, - const SplitterDescriptor& desc, - const char* name = nullptr) - { - IgnoreUnused(desc, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{3U}; @@ -2428,50 +1583,11 @@ BOOST_AUTO_TEST_CASE(QuantizeSplitter) IConnectableLayer* splitter = network->AddSplitterLayer(splitterDesc); CompleteLeakyReluNetwork(network.get(), activation, splitter, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestSplitterQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestSplitterQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestSplitterQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestSplitterQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeResize) { - class TestResizeQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestResizeQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) - {} - - TestResizeQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) - {} - - void VisitResizeLayer(const IConnectableLayer* layer, - const ResizeDescriptor& resizeDescriptor, - const char* name = nullptr) override - { - IgnoreUnused(resizeDescriptor, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; @@ -2487,48 +1603,11 @@ BOOST_AUTO_TEST_CASE(QuantizeResize) CompleteLeakyReluNetwork(network.get(), activation, resizeLayer, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestResizeQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestResizeQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestResizeQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestResizeQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeStridedSlice) { - class TestStridedSliceQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestStridedSliceQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestStridedSliceQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - virtual void VisitStridedSliceLayer(const IConnectableLayer* layer, - const StridedSliceDescriptor& desc, - const char* name = nullptr) - { - IgnoreUnused(desc, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{3U}; @@ -2542,48 +1621,11 @@ BOOST_AUTO_TEST_CASE(QuantizeStridedSlice) CompleteLeakyReluNetwork(network.get(), activation, stridedSlice, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestStridedSliceQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestStridedSliceQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestStridedSliceQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestStridedSliceQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeBatchToSpace) { - class TestBatchToSpaceQuantization : public TestLeakyReLuActivationQuantization - { - public: - TestBatchToSpaceQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(inputShape, outputShape) {} - - TestBatchToSpaceQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestLeakyReLuActivationQuantization(options, inputShape, outputShape) {} - - void VisitBatchToSpaceNdLayer(const IConnectableLayer* layer, - const BatchToSpaceNdDescriptor& batchToSpaceNdDescriptor, - const char* name = nullptr) override - { - IgnoreUnused(batchToSpaceNdDescriptor, name); - CheckForwardedQuantizationSettings(layer); - } - }; - INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; @@ -2597,24 +1639,7 @@ BOOST_AUTO_TEST_CASE(QuantizeBatchToSpace) CompleteLeakyReluNetwork(network.get(), activation, batchToSpace, info); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestBatchToSpaceQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestBatchToSpaceQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestBatchToSpaceQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestBatchToSpaceQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizePrelu) @@ -2637,52 +1662,59 @@ BOOST_AUTO_TEST_CASE(QuantizePrelu) , m_AlphaShape(alphaShape) {} - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override { - IgnoreUnused(id, name); - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); + IgnoreUnused(name, constants, id, descriptor); - switch (id) + switch (layer->GetType()) { - case 0: // Input - BOOST_TEST(m_InputShape == info.GetShape()); - break; - case 1: // Alpha - BOOST_TEST(m_AlphaShape == info.GetShape()); - break; - default: - throw InvalidArgumentException("Invalid layer binding id for PReLU layer"); + case armnn::LayerType::Input : + { + const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); + + switch (id) + { + case 0: // Input + BOOST_TEST(m_InputShape == info.GetShape()); + break; + case 1: // Alpha + BOOST_TEST(m_AlphaShape == info.GetShape()); + break; + default: + throw InvalidArgumentException("Invalid layer binding id for PReLU layer"); + } + + // Based off current default [-15.0f, 15.0f] + TestQuantizationParams(info, + { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 + { 30.0f / g_AsymmS8QuantizationBase, 0}, // QASymmS8 + { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 + { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 + break; + } + case armnn::LayerType::Output : + { + const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + BOOST_TEST(m_OutputShape == info.GetShape()); + break; + } + case armnn::LayerType::Prelu : + { + const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); + TestQuantizationParams(info, + { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 + { 30.0f / g_AsymmS8QuantizationBase, 0}, // QAsymmS8 + { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 + { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 + break; + } + default: + {} } - - // Based off current default [-15.0f, 15.0f] - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 - { 30.0f / g_AsymmS8QuantizationBase, 0}, // QASymmS8 - { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 - { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 - } - - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(id, name); - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_TEST(m_OutputShape == info.GetShape()); - } - - void VisitPreluLayer(const IConnectableLayer* layer, - const char* name = nullptr) override - { - IgnoreUnused(name); - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 - { 30.0f / g_AsymmS8QuantizationBase, 0}, // QAsymmS8 - { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 - { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 } private: @@ -2740,30 +1772,6 @@ BOOST_AUTO_TEST_CASE(QuantizePrelu) void TestQuantizeTransposeConvolution2d(bool useBiases) { - class TestTransposeConvolution2dQuantization : public TestQuantization - { - public: - TestTransposeConvolution2dQuantization(const TensorShape& inputShape, const TensorShape& outputShape) : - TestQuantization(inputShape, outputShape) - {} - - TestTransposeConvolution2dQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) : - TestQuantization(options, inputShape, outputShape) - {} - - void VisitTransposeConvolution2dLayer(const IConnectableLayer *layer, - const TransposeConvolution2dDescriptor& descriptor, - const ConstTensor& weights, - const Optional<ConstTensor>& biases, - const char *name = nullptr) override - { - IgnoreUnused(descriptor, name); - TestQuantizationOnLayersWithBiases(layer, weights, biases); - } - }; - INetworkPtr network = INetwork::Create(); TensorShape shape{ 3 }; @@ -2794,28 +1802,7 @@ void TestQuantizeTransposeConvolution2d(bool useBiases) input->GetOutputSlot(0).SetTensorInfo(info); transposeConv2d->GetOutputSlot(0).SetTensorInfo(info); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestTransposeConvolution2dQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - //test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestTransposeConvolution2dQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestTransposeConvolution2dQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QSymmS16 quantization - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestTransposeConvolution2dQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeTransposeConvolution2d) @@ -2835,38 +1822,45 @@ BOOST_AUTO_TEST_CASE(QuantizeStack) public: TestStackQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} + : TestQuantization(inputShape, outputShape) {} TestStackQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} + : TestQuantization(options, inputShape, outputShape) {} - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override { - IgnoreUnused(layer, id, name); - } - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(layer, id, name); - } + IgnoreUnused(name, constants, id, descriptor); - void VisitStackLayer(const IConnectableLayer* layer, - const StackDescriptor& descriptor, - const char* name = nullptr) override - { - IgnoreUnused(descriptor, name); - TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); - - TestQuantizationParams(outputInfo, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS16QuantizationBase, 0 }); + switch (layer->GetType()) + { + case armnn::LayerType::Input : + { + break; + } + case armnn::LayerType::Output : + { + break; + } + case armnn::LayerType::Stack : + { + TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); + + TestQuantizationParams(outputInfo, + { 30.0f / g_AsymmU8QuantizationBase, 128 }, + { 30.0f / g_AsymmS8QuantizationBase, 0}, + { 15.0f / g_SymmS8QuantizationBase, 0}, + { 15.0f / g_SymmS16QuantizationBase, 0 }); + break; + } + default: + {} + } } }; @@ -2909,35 +1903,6 @@ BOOST_AUTO_TEST_CASE(QuantizeStack) BOOST_AUTO_TEST_CASE(QuantizeSlice) { - class TestSliceQuantization : public TestQuantization - { - public: - TestSliceQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) - {} - - TestSliceQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) - {} - - virtual void VisitSliceLayer(const IConnectableLayer* layer, - const SliceDescriptor& desc, - const char* name = nullptr) - { - IgnoreUnused(desc, name); - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params{ 30.0f / g_AsymmS8QuantizationBase, 0 }; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0 }; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - } - }; - TensorShape shape{ 3 }; TensorInfo info(shape, DataType::Float32); @@ -2953,28 +1918,7 @@ BOOST_AUTO_TEST_CASE(QuantizeSlice) inputLayer->GetOutputSlot(0).SetTensorInfo(info); sliceLayer->GetOutputSlot(0).SetTensorInfo(info); - // test QAsymmU8 quantization - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestSliceQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - // test QASymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestSliceQuantization validatorQAsymmS8(qAsymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - // test QSymmS8 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestSliceQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - // test QSymmS16 quantization - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestSliceQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); + TestNetwork(network.get(), shape); } std::vector<uint8_t> SetupQuantize(float value) @@ -3002,50 +1946,55 @@ BOOST_AUTO_TEST_CASE(QuantizeNegativeInf) BOOST_CHECK_EQUAL(SetupQuantize(-1 * std::numeric_limits<float>::infinity())[0], 0); } -class TestPreserveType : public TestAdditionQuantization +class TestPreserveType : public TestQuantization { public: TestPreserveType(const QuantizerOptions& options, const DataType& dataType, const TensorShape& inputShape, const TensorShape& outputShape) - : TestAdditionQuantization(options, inputShape, outputShape) + : TestQuantization(options, inputShape, outputShape) , m_DataType(dataType) , m_VisitedQuantizeLayer(false) , m_VisitedDequantizeLayer(false) {} - void VisitInputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(id, name); - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); - BOOST_TEST(m_InputShape == info.GetShape()); - } - - void VisitOutputLayer(const IConnectableLayer* layer, - LayerBindingId id, - const char* name = nullptr) override - { - IgnoreUnused(id, name); - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); - BOOST_TEST(m_OutputShape == info.GetShape()); - } - - void VisitQuantizeLayer(const IConnectableLayer* layer, - const char* name = nullptr) override + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override { - IgnoreUnused(layer, name); - m_VisitedQuantizeLayer = true; - } + IgnoreUnused(name, constants, id, descriptor); - void VisitDequantizeLayer(const IConnectableLayer* layer, - const char* name = nullptr) override - { - IgnoreUnused(layer, name); - m_VisitedDequantizeLayer = true; + switch (layer->GetType()) + { + case armnn::LayerType::Input : + { + const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); + BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); + BOOST_TEST(m_InputShape == info.GetShape()); + break; + } + case armnn::LayerType::Output : + { + const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); + BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); + BOOST_TEST(m_OutputShape == info.GetShape()); + break; + } + case armnn::LayerType::Quantize : + { + m_VisitedQuantizeLayer = true; + break; + } + case armnn::LayerType::Dequantize : + { + m_VisitedDequantizeLayer = true; + break; + } + default: + {} + } } void CheckQuantizeDequantizeLayerVisited(bool expected) @@ -3119,39 +2068,52 @@ BOOST_AUTO_TEST_CASE(PreserveTypeQsymm16) BOOST_AUTO_TEST_CASE(TestConnectionPreservationAfterDynamicQuant) { - class TestConnectionPreservation : public LayerVisitorBase<VisitorNoThrowPolicy> + class TestConnectionPreservation : public IStrategy { public: TestConnectionPreservation(const Graph& graph) - : LayerVisitorBase<VisitorNoThrowPolicy>() - , m_Graph(graph) + : m_Graph(graph) {} - void VisitAdditionLayer(const IConnectableLayer* layer, const char*) override - { - CheckLayerName(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), "reLU1"); - CheckLayerName(layer->GetInputSlot(1).GetConnection()->GetOwningLayerGuid(), "reLU2"); - } + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const BaseDescriptor& descriptor, + const std::vector<armnn::ConstTensor>& constants, + const char* name, + const armnn::LayerBindingId id) override + { + IgnoreUnused(name, constants, id, descriptor); - void CheckLayerName(LayerGuid guid, std::string expectedName) + switch (layer->GetType()) { - bool guidFound = false; - for (Layer* layer : m_Graph) + case armnn::LayerType::Addition : { - if (layer->GetGuid() == guid) - { - BOOST_CHECK_EQUAL(layer->GetName(), expectedName.c_str()); - guidFound = true; - break; - } + CheckLayerName(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), "reLU1"); + CheckLayerName(layer->GetInputSlot(1).GetConnection()->GetOwningLayerGuid(), "reLU2"); + break; } - if (!guidFound) + default: + {} + } + } + + void CheckLayerName(LayerGuid guid, std::string expectedName) + { + bool guidFound = false; + for (Layer* layer : m_Graph) + { + if (layer->GetGuid() == guid) { - BOOST_FAIL("No layer matching the GUID was found"); + BOOST_CHECK_EQUAL(layer->GetName(), expectedName.c_str()); + guidFound = true; + break; } } - - private: + if (!guidFound) + { + BOOST_FAIL("No layer matching the GUID was found"); + } + } + private: Graph m_Graph; }; @@ -3177,8 +2139,8 @@ BOOST_AUTO_TEST_CASE(TestConnectionPreservationAfterDynamicQuant) reLULayer2->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); addLayer1->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); - TestConnectionPreservation visitor1(PolymorphicDowncast<const Network*>(network.get())->GetGraph()); - VisitLayersTopologically(network.get(), visitor1); + TestConnectionPreservation strategy1(PolymorphicDowncast<const Network*>(network.get())->GetGraph()); + VisitLayersTopologically(network.get(), strategy1); armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get()); @@ -3193,8 +2155,8 @@ BOOST_AUTO_TEST_CASE(TestConnectionPreservationAfterDynamicQuant) INetworkPtr quantNetwork = quantizer->ExportNetwork(); - TestConnectionPreservation visitor2(PolymorphicDowncast<const Network*>(quantNetwork.get())->GetGraph()); - VisitLayersTopologically(quantNetwork.get(), visitor2); + TestConnectionPreservation strategy2(PolymorphicDowncast<const Network*>(quantNetwork.get())->GetGraph()); + VisitLayersTopologically(quantNetwork.get(), strategy2); } BOOST_AUTO_TEST_SUITE_END() |