From 4a621c43174b6bdd9dc0bff839b245bc2139d6a6 Mon Sep 17 00:00:00 2001 From: Kevin May Date: Thu, 22 Apr 2021 16:51:18 +0100 Subject: IVGCVSW-5719 Remove Quantizer Signed-off-by: Kevin May Change-Id: I8a29df03afdd6f1cc8413b219e179272cd2d51cf --- src/armnn/DynamicQuantizationStrategy.cpp | 276 ---- src/armnn/DynamicQuantizationStrategy.hpp | 59 - src/armnn/NetworkQuantizationScheme.hpp | 157 -- src/armnn/NetworkQuantizer.cpp | 190 --- src/armnn/NetworkQuantizer.hpp | 61 - src/armnn/NetworkQuantizerUtils.cpp | 43 - src/armnn/NetworkQuantizerUtils.hpp | 70 - src/armnn/OverrideInputRangeVisitor.cpp | 32 - src/armnn/OverrideInputRangeVisitor.hpp | 93 -- src/armnn/QuantizerStrategy.cpp | 519 ------- src/armnn/QuantizerStrategy.hpp | 63 - src/armnn/StaticRangeStrategy.cpp | 193 --- src/armnn/StaticRangeStrategy.hpp | 41 - src/armnn/test/QuantizerTest.cpp | 2220 ----------------------------- 14 files changed, 4017 deletions(-) delete mode 100644 src/armnn/DynamicQuantizationStrategy.cpp delete mode 100644 src/armnn/DynamicQuantizationStrategy.hpp delete mode 100644 src/armnn/NetworkQuantizationScheme.hpp delete mode 100644 src/armnn/NetworkQuantizer.cpp delete mode 100644 src/armnn/NetworkQuantizer.hpp delete mode 100644 src/armnn/NetworkQuantizerUtils.cpp delete mode 100644 src/armnn/NetworkQuantizerUtils.hpp delete mode 100644 src/armnn/OverrideInputRangeVisitor.cpp delete mode 100644 src/armnn/OverrideInputRangeVisitor.hpp delete mode 100644 src/armnn/QuantizerStrategy.cpp delete mode 100644 src/armnn/QuantizerStrategy.hpp delete mode 100644 src/armnn/StaticRangeStrategy.cpp delete mode 100644 src/armnn/StaticRangeStrategy.hpp delete mode 100644 src/armnn/test/QuantizerTest.cpp (limited to 'src/armnn') diff --git a/src/armnn/DynamicQuantizationStrategy.cpp b/src/armnn/DynamicQuantizationStrategy.cpp deleted file mode 100644 index d354a0e441..0000000000 --- a/src/armnn/DynamicQuantizationStrategy.cpp +++ /dev/null @@ -1,276 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "DynamicQuantizationStrategy.hpp" -#include "NetworkUtils.hpp" - -#include -#include -#include -#include - -#include - -namespace armnn -{ -DynamicQuantizationStrategy::DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph) - : m_RangeTracker(rangeTracker), - m_Graph(graph) -{} - -void DynamicQuantizationStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) -{ - m_RangeTracker.SetRange(layer, outputIdx, min, max); -} - -void DynamicQuantizationStrategy::ForwardParentParameters(const IConnectableLayer* layer) -{ - for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) - { - const IOutputSlot *outputSlot = layer->GetInputSlot(i).GetConnection(); - LayerGuid previousLayerId = outputSlot->GetOwningLayerGuid(); - unsigned int ownerIndex = outputSlot->CalculateIndexOnOwner(); - const auto parentRange = m_RangeTracker.GetRange(previousLayerId, ownerIndex); - SetRange(layer, i, parentRange.first, parentRange.second); - } -} - -void DynamicQuantizationStrategy::AddToCalibratedLayers(const IConnectableLayer* layer) -{ - m_LayersToCalibrate.push_back(layer); -} - -void DynamicQuantizationStrategy::AddToNonCalibratedLayers(const IConnectableLayer* layer) -{ - m_LayersNotToCalibrate.push_back(layer); -} - -void DynamicQuantizationStrategy::FinishStrategy() -{ - for (const IConnectableLayer* layer : m_LayersToCalibrate) - { - std::vector newDebugLayers = InsertDebugLayerAfter( - m_Graph, *PolymorphicDowncast(const_cast(layer))); - // record them so we can take them out again efficiently afterward - m_DebugLayers.insert(std::end(m_DebugLayers), std::begin(newDebugLayers), std::end(newDebugLayers)); - } -} - -void DynamicQuantizationStrategy::RemoveDebugLayers() -{ - for (DebugLayer* debugLayer : m_DebugLayers) - { - OutputSlot& proceedingOutputSlot = *debugLayer->GetInputSlot(0).GetConnectedOutputSlot(); - proceedingOutputSlot.Disconnect(debugLayer->GetInputSlot(0)); - - for (InputSlot* succeedingInputSlot : debugLayer->GetOutputSlot(0).GetConnections()) - { - debugLayer->GetOutputSlot(0).Disconnect(*succeedingInputSlot); - proceedingOutputSlot.Connect(*succeedingInputSlot); - } - m_Graph.EraseLayer(debugLayer); - } - m_DebugLayers.clear(); -} - -void DynamicQuantizationStrategy::VisitNonCalibratedLayers() { - RemoveDebugLayers(); - for (const IConnectableLayer* layer : m_LayersNotToCalibrate) - { - ForwardParentParameters(layer); - } -} - - -void DynamicQuantizationStrategy::ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) -{ - IgnoreUnused(name); - IgnoreUnused(id); - IgnoreUnused(descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Activation : - { - const ActivationDescriptor& activationDescriptor = static_cast(descriptor); - switch (activationDescriptor.m_Function) - { - // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu - case ActivationFunction::Abs: - case ActivationFunction::Linear: - case ActivationFunction::ReLu: - case ActivationFunction::SoftReLu: - SetRange(layer, 0, 0.f, 15.f); - break; - case ActivationFunction::BoundedReLu: - SetRange(layer, 0, 0.f, activationDescriptor.m_A); - break; - case ActivationFunction::TanH: - SetRange(layer, 0, -1.f, 1.f); - break; - case ActivationFunction::LeakyReLu: - SetRange(layer, 0, -5.f, 15.f); - break; - default: - SetRange(layer, 0, -15.f, 15.f); - break; - } - break; - } - case armnn::LayerType::Addition : - { - SetRange(layer, 0, -20.f, 20.f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::ArgMinMax : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::BatchNormalization : - { - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::Normalization: - { - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::Convolution2d: - { - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::DepthwiseConvolution2d: - { - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::FullyConnected : - { - SetRange(layer, 0, -15.0f, 15.0f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::Permute : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::SpaceToBatchNd : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::Pooling2d : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::Softmax : - { - SetRange(layer, 0, 0.f, 1.f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::Constant : - { - if (constants[0].GetDataType() != DataType::Float32) - { - throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); - } - - // Work out the range based on the input constants - unsigned int inputNumElements = constants[0].GetNumElements(); - const float* inputData = reinterpret_cast(constants[0].GetMemoryArea()); - - float min = std::numeric_limits::max(); - float max = std::numeric_limits::lowest(); - - for (unsigned int i = 0; i < inputNumElements; i++) - { - const float inputValue = inputData[i]; - - min = std::min(min, inputValue); - max = std::max(max, inputValue); - } - SetRange(layer, 0, min, max); - break; - } - case armnn::LayerType::Concat : - { - float min = std::numeric_limits::max(); - float max = std::numeric_limits::lowest(); - for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) - { - const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); - LayerGuid layerId = outputSlot->GetOwningLayerGuid(); - unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); - RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); - min = std::min(min, range.first); - max = std::max(max, range.second); - } - SetRange(layer, 0, min, max); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::Reshape : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::Splitter : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::Resize : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::StridedSlice : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::BatchToSpaceNd : - { - AddToNonCalibratedLayers(layer); - break; - } - case armnn::LayerType::Input : - { - SetRange(layer, 0, -0.0f, 0.0f); - AddToCalibratedLayers(layer); - break; - } - case armnn::LayerType::Output : - { - AddToNonCalibratedLayers(layer); - m_OutputLayers.push_back(id); - break; - } - default: - {} - } -} - -const std::vector& DynamicQuantizationStrategy::GetOutputLayers() -{ - return m_OutputLayers; -} - -} //namespace armnn diff --git a/src/armnn/DynamicQuantizationStrategy.hpp b/src/armnn/DynamicQuantizationStrategy.hpp deleted file mode 100644 index aa77a4b563..0000000000 --- a/src/armnn/DynamicQuantizationStrategy.hpp +++ /dev/null @@ -1,59 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "armnn/LayerVisitorBase.hpp" -#include "RangeTracker.hpp" -#include "layers/DebugLayer.hpp" - -#include -#include - -namespace armnn -{ - -/// Visitor class implementation to gather the TensorInfo for LayerBindingID for creation of ConstTensor for Refine. -class DynamicQuantizationStrategy : public armnn::IStrategy -{ -public: - - DynamicQuantizationStrategy(RangeTracker& rangeTracker, Graph& graph); - ~DynamicQuantizationStrategy() = default; - - virtual void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const armnn::BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id = 0) override; - - const std::vector& GetOutputLayers(); - void VisitNonCalibratedLayers(); - void FinishStrategy() override; - - -private: - /// Set the range for an output slot on a layer - void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); - - void ForwardParentParameters(const IConnectableLayer* layer); - - /// Mapping from a layer Guid to an array of ranges for outputs - RangeTracker& m_RangeTracker; - - Graph& m_Graph; - - std::vector m_LayersToCalibrate; - std::vector m_LayersNotToCalibrate; - std::vector m_DebugLayers; - - std::vector m_OutputLayers; - void AddToCalibratedLayers(const IConnectableLayer* layer); - void AddToNonCalibratedLayers(const IConnectableLayer* layer); - void RemoveDebugLayers(); - - -}; -} //namespace armnn diff --git a/src/armnn/NetworkQuantizationScheme.hpp b/src/armnn/NetworkQuantizationScheme.hpp deleted file mode 100644 index a78fd725b4..0000000000 --- a/src/armnn/NetworkQuantizationScheme.hpp +++ /dev/null @@ -1,157 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include - -#include -#include - -namespace armnn -{ - -using OffsetScalePair = std::pair; - -struct IQuantizationScheme -{ - virtual OffsetScalePair ComputeScheme(double min, double max) const = 0; - - virtual int NumBits() const = 0; - - virtual DataType GetDataType() const = 0; - - virtual ~IQuantizationScheme() {} -}; - -struct QAsymmU8QuantizationScheme : IQuantizationScheme -{ - OffsetScalePair ComputeScheme(double min, double max) const override - { - if (min > max) - { - throw InvalidArgumentException("min > max will result in invalid quantization."); - } - - double highest = (1 << NumBits()) - 1; - - min = std::min(0.0, min); // min <= 0.0 - max = std::max(0.0, max); // max >= 0.0 - - // To avoid dividing by zero when quantizing a zero filled tensor - if (min == 0.0 && max == 0.0) - { - max = 1.0; - } - - // Assumes quantization range [0-highest] - double scale = (max-min) / highest; - double offset = -min / scale; - - // Clamp offset [0-highest] - offset = std::max(0.0, std::min(highest, offset)); - - return std::make_pair(static_cast(scale), static_cast(std::round(offset))); - } - - int NumBits() const override { return 8; } - - DataType GetDataType() const override { return DataType::QAsymmU8; } -}; - -struct QAsymmS8QuantizationScheme : IQuantizationScheme -{ - OffsetScalePair ComputeScheme(double min, double max) const override - { - if (min > max) - { - throw InvalidArgumentException("min > max will result in invalid quantization."); - } - - double highest = (1 << NumBits()) - 1; - - min = std::min(0.0, min); // min <= 0.0 - max = std::max(0.0, max); // max >= 0.0 - - // To avoid dividing by zero when quantizing a zero filled tensor - if (min == 0.0 && max == 0.0) - { - max = 1.0; - } - - // Assumes quantization range [0-255] - double scale = (max-min) / highest ; - double offset = - min / scale; - - //Clamp 0 to Highest - offset = std::max(0.0, std::min(highest, offset)); - - //-128 on offset to cast to signed range - return std::make_pair(static_cast(scale), static_cast(std::round(offset)-128)); - } - - int NumBits() const override { return 8; } - - DataType GetDataType() const override { return DataType::QAsymmS8; } -}; - -struct QSymmS8QuantizationScheme : IQuantizationScheme -{ - OffsetScalePair ComputeScheme(double min, double max) const override - { - if (min > max) - { - throw InvalidArgumentException("min > max will result in invalid quantization."); - } - - // To avoid dividing by zero when quantizing a zero filled tensor - if (min == 0.0 && max == 0.0) - { - max = 1.0; - } - - double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit - - double extent = std::max(std::abs(min), std::abs(max)); - double scale = extent / highest; - - return std::make_pair(static_cast(scale), 0); - } - - int NumBits() const override { return 8; } - - DataType GetDataType() const override { return DataType::QSymmS8; } -}; - -struct QSymm16QuantizationScheme : IQuantizationScheme -{ - OffsetScalePair ComputeScheme(double min, double max) const override - { - if (min > max) - { - throw InvalidArgumentException("min > max will result in invalid quantization."); - } - - // To avoid dividing by zero when quantizing a zero filled tensor - if (min == 0.0 && max == 0.0) - { - max = 1.0; - } - - double highest = (1 << (NumBits()-1)) - 1; // (numbits-1) accounts for the sign bit - - double extent = std::max(std::abs(min), std::abs(max)); - double scale = extent / highest; - - return std::make_pair(static_cast(scale), 0); - - } - - int NumBits() const override { return 16; } - - DataType GetDataType() const override { return DataType::QSymmS16; } -}; - -} // namespace armnn diff --git a/src/armnn/NetworkQuantizer.cpp b/src/armnn/NetworkQuantizer.cpp deleted file mode 100644 index fd4486bc31..0000000000 --- a/src/armnn/NetworkQuantizer.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NetworkQuantizer.hpp" -#include "NetworkQuantizerUtils.hpp" -#include "Graph.hpp" -#include "Layer.hpp" -#include "Network.hpp" -#include "DynamicQuantizationStrategy.hpp" -#include "StaticRangeStrategy.hpp" -#include "QuantizerStrategy.hpp" -#include "OverrideInputRangeVisitor.hpp" - -#include - -#include -#include -#include -#include - -#include -#include - -#include - -#include -#include - -namespace armnn -{ - -using TContainer = - mapbox::util::variant, std::vector, std::vector, std::vector>; - -INetworkQuantizer* INetworkQuantizer::CreateRaw(INetwork* inputNetwork, const QuantizerOptions& options) -{ - return new NetworkQuantizer(inputNetwork, options); -} - -INetworkQuantizerPtr INetworkQuantizer::Create(INetwork* inputNetwork, const QuantizerOptions& options) -{ - return INetworkQuantizerPtr(CreateRaw(inputNetwork, options), &INetworkQuantizer::Destroy); -} - -void INetworkQuantizer::Destroy(INetworkQuantizer *quantizer) -{ - delete PolymorphicDowncast(quantizer); -} - -void NetworkQuantizer::OverrideInputRange(LayerBindingId layerId, float min, float max) -{ - const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph(); - auto inputLayers = graph.GetInputLayers(); - - // Walk the input layers of the graph and override the quantization parameters of the one with the given id - OverrideInputRangeVisitor overrideInputRangeVisitor(m_Ranges, layerId, RangeTracker::MinMaxRange{min, max}); - VisitLayers(inputLayers, overrideInputRangeVisitor); -} - -void NetworkQuantizer::Refine(const InputTensors& inputTensors) -{ - // The first time Refine is called the m_Runtime and the DynamicQuantizationStrategy - // will not have been created. Need to get the environment set up, Runtime loaded, - // DynamicQuantizationStrategy created and run over the network to initialise itself - // and the RangeTracker the Debug callback registered and an initial inference - // done to set up the first min/max values - if (!m_Runtime) - { - m_RefineCount = 0; - m_Ranges.SetDynamicMode(true); - const Graph& cGraph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort(); - - // need to insert Debug layers in the DynamicQuantizationStrategy - Graph& graph = const_cast(cGraph); - - // Initialize RangeTracker to the default values for each layer. - // The default values are overwritten by the min/max that is - // recorded during the first dataset min/max calibration. This - // initialisation is only required for the first call of Refine(). - m_DynamicQuantizationStrategy = DynamicQuantizationStrategy(m_Ranges, graph); - ApplyStrategyToLayers(cGraph, m_DynamicQuantizationStrategy.value()); - - IRuntime::CreationOptions options; - m_Runtime = IRuntime::Create(options); - - // Optimize network - debug already enabled for layers that require quantization - OptimizerOptions optimizerOptions(false, false); - std::vector backends = {"CpuRef"}; - IOptimizedNetworkPtr optimizedNet = Optimize(*m_InputNetwork, - backends, - m_Runtime->GetDeviceSpec(), - optimizerOptions); - - m_Runtime->LoadNetwork(m_NetworkId, std::move(optimizedNet)); - - // Debug callback function to refine min/max in RangeTracker - auto rangeTrackerCallback = [&](LayerGuid guid, unsigned int slotIndex, ITensorHandle *tensorHandle) { - // Get min/max pair from tensor data - std::pair minMax = armnnUtils::FindMinMax(tensorHandle); - - // For first calibration dataset, set min/max range in RangeTracker to - // min/max ranges gathered during inference - if (m_RefineCount == 0) - { - m_Ranges.ResetMinMax(guid, slotIndex, minMax.first, minMax.second); - } - else - { - // For every other calibration dataset, only set min/max range if the - // values gathered are less than / greater than originally recorded. - m_Ranges.RefineMin(guid, slotIndex, minMax.first); - m_Ranges.RefineMax(guid, slotIndex, minMax.second); - } - }; - - m_Runtime->RegisterDebugCallback(m_NetworkId, rangeTrackerCallback); - } - - // Create output tensor for EnqueueWorkload - std::vector outputBindings; - auto outputLayers = m_DynamicQuantizationStrategy.value().GetOutputLayers(); - std::vector outputVectors; - for (auto outputLayerBindingId : outputLayers) - { - auto outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, outputLayerBindingId); - outputBindings.push_back(std::make_pair(outputLayerBindingId, outputTensorInfo)); - outputVectors.push_back(std::vector(outputTensorInfo.GetNumElements(), 0)); - } - OutputTensors outputTensors = armnnUtils::MakeOutputTensors(outputBindings, outputVectors); - - // Execute EnqueueWorkload with calibration image - m_Runtime->EnqueueWorkload(m_NetworkId, inputTensors, outputTensors); - ++m_RefineCount; -} - -INetworkPtr NetworkQuantizer::ExportNetwork() -{ - const Graph& graph = m_InputNetwork->pNetworkImpl->GetGraph().TopologicalSort(); - - // Step 1) Walk the graph and populate default min/max values for - // intermediate tensors, only if Runtime does not exist (created - // if Refine has been called) - if (!m_Runtime) - { - m_Ranges.SetDynamicMode(false); - StaticRangeStrategy rangeStrategy(m_Ranges); - ApplyStrategyToLayers(graph, rangeStrategy); - } - else - { - // Set min/max range of non-calibrated layers to parent layer's range - m_DynamicQuantizationStrategy.value().VisitNonCalibratedLayers(); - // now tear down the runtime and the dynamic visitor. - m_Runtime.reset(nullptr); - m_DynamicQuantizationStrategy = EmptyOptional(); - m_RefineCount = 0; - } - - // Step 2) Convert input InputNetwork to Quantized InputNetwork - std::unique_ptr quantizationScheme; - switch (m_Options.m_ActivationFormat) - { - case DataType::QAsymmU8: - quantizationScheme = std::make_unique(); - break; - case DataType::QAsymmS8: - quantizationScheme = std::make_unique(); - break; - case DataType::QSymmS8: - quantizationScheme = std::make_unique(); - break; - case DataType::QSymmS16: - quantizationScheme = std::make_unique(); - break; - default: - throw InvalidArgumentException("Unsupported quantization target"); - } - - QuantizerStrategy quantizerVisitor(m_Ranges, quantizationScheme.get(), m_Options.m_PreserveType); - ApplyStrategyToLayers(graph, quantizerVisitor); - - // clear the ranges - m_Ranges.Reset(); - - return quantizerVisitor.RetrieveFinalNetwork(); -} - -} //namespace armn diff --git a/src/armnn/NetworkQuantizer.hpp b/src/armnn/NetworkQuantizer.hpp deleted file mode 100644 index a07ac8827e..0000000000 --- a/src/armnn/NetworkQuantizer.hpp +++ /dev/null @@ -1,61 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include -#include -#include -#include -#include - -#include "DynamicQuantizationStrategy.hpp" -#include "RangeTracker.hpp" - -namespace armnn -{ - -class NetworkQuantizer : public INetworkQuantizer -{ -public: - NetworkQuantizer(INetwork* inputNetwork, const QuantizerOptions& options) - : m_InputNetwork(inputNetwork), - m_NetworkId(0), - m_Runtime(nullptr, &IRuntime::Destroy), - m_RefineCount(0), - m_Options(options) {} - - void OverrideInputRange(LayerBindingId layerId, float min, float max) override; - void Refine(const InputTensors& inputTensors) override; - - // Required for testing? Need some way to get min/max in RangeTracker (m_Ranges) - std::pair GetMinMaxRange(LayerGuid guid, unsigned int idx) { return m_Ranges.GetRange(guid, idx); } - INetworkPtr ExportNetwork() override; - -private: - /// Original input network to quantize - INetwork* m_InputNetwork; - - NetworkId m_NetworkId; - - // if we are run in dynamic mode this unique pointer will hold - // the runtime between invocations of the Refine method. - IRuntimePtr m_Runtime; - - Optional m_DynamicQuantizationStrategy; - - // counts the number of times refine is called - unsigned int m_RefineCount; - - /// Mapping from Guid to an array of ranges for outputs - RangeTracker m_Ranges; - - /// Options for the NetworkQuantizer - QuantizerOptions m_Options; - - std::pair FindMinMax(ITensorHandle* tensorHandle); -}; - -} //namespace armnn diff --git a/src/armnn/NetworkQuantizerUtils.cpp b/src/armnn/NetworkQuantizerUtils.cpp deleted file mode 100644 index dd0affde25..0000000000 --- a/src/armnn/NetworkQuantizerUtils.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "NetworkQuantizerUtils.hpp" - -#include -#include -#include - -namespace armnn -{ - -ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector& backing) -{ - float scale = 0.0f; - int offset = 0; - - // Reserve the backing memory - backing.resize(tensor.GetInfo().GetNumElements()); - - DataType type = tensor.GetInfo().GetDataType(); - switch(type) - { - case DataType::Float32: - { - QuantizeConstant(static_cast(tensor.GetMemoryArea()), - backing.data(), - backing.size(), - scale, - offset); - } - break; - default: - ARMNN_ASSERT_MSG(false, "Can't quantize unsupported data type"); - } - - TensorInfo qInfo(tensor.GetInfo().GetShape(), DataType::QAsymmU8, scale, offset); - return ConstTensor(qInfo, backing); -} - -} // namespace armnn diff --git a/src/armnn/NetworkQuantizerUtils.hpp b/src/armnn/NetworkQuantizerUtils.hpp deleted file mode 100644 index 5497e1b898..0000000000 --- a/src/armnn/NetworkQuantizerUtils.hpp +++ /dev/null @@ -1,70 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NetworkQuantizationScheme.hpp" - -#include -#include -#include -#include -#include - -#include -#include - -namespace armnn -{ - -template -void QuantizeConstant(const srcType* src, uint8_t* dst, size_t numElements, float& scale, int& offset) -{ - ARMNN_ASSERT(src); - ARMNN_ASSERT(dst); - - float min = std::numeric_limits::max(); - float max = std::numeric_limits::lowest(); - for (size_t i = 0; i < numElements; ++i) - { - min = std::min(min, src[i]); - max = std::max(max, src[i]); - } - - QAsymmU8QuantizationScheme quantizationScheme; - OffsetScalePair qParams = quantizationScheme.ComputeScheme(min, max); - scale = qParams.first; - offset = qParams.second; - - for (size_t i = 0; i < numElements; ++i) - { - dst[i] = armnn::Quantize(src[i], scale, offset); - } -} - -ConstTensor CreateQuantizedConst(const ConstTensor& tensor, std::vector& backing); - -template -void VisitLayers(const LayerContainer& layerContainer, ILayerVisitor& visitor) -{ - visitor.StartVisit(); - for (auto layer : layerContainer) - { - layer->Accept(visitor); - } - visitor.FinishVisit(); -} - -template -void ApplyStrategyToLayers(const LayerContainer& layerContainer, IStrategy& strategy) -{ - for (auto layer : layerContainer) - { - layer->ExecuteStrategy(strategy); - } - strategy.FinishStrategy(); -} - -} // namespace armnn diff --git a/src/armnn/OverrideInputRangeVisitor.cpp b/src/armnn/OverrideInputRangeVisitor.cpp deleted file mode 100644 index 6e5137b794..0000000000 --- a/src/armnn/OverrideInputRangeVisitor.cpp +++ /dev/null @@ -1,32 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "OverrideInputRangeVisitor.hpp" -#include "NetworkQuantizerUtils.hpp" -#include "Layer.hpp" - -#include - -namespace armnn -{ - -OverrideInputRangeVisitor::OverrideInputRangeVisitor(RangeTracker& ranges, - LayerBindingId layerId, - const MinMaxRange& minMaxRange) - : m_Ranges(ranges) - , m_LayerId(layerId) - , m_MinMaxRange(minMaxRange) -{} - -void OverrideInputRangeVisitor::VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name) -{ - IgnoreUnused(name); - if (m_LayerId == id) - { - m_Ranges.SetRange(layer, 0, m_MinMaxRange.first, m_MinMaxRange.second); - } -} - -} // namespace armnn diff --git a/src/armnn/OverrideInputRangeVisitor.hpp b/src/armnn/OverrideInputRangeVisitor.hpp deleted file mode 100644 index 196a3aab1d..0000000000 --- a/src/armnn/OverrideInputRangeVisitor.hpp +++ /dev/null @@ -1,93 +0,0 @@ -// -// Copyright © 2017 Arm Ltd. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "NetworkQuantizer.hpp" -#include "armnn/LayerVisitorBase.hpp" -#include "RangeTracker.hpp" - -#include - -namespace armnn -{ -class OverrideInputRangeStrategy : public IStrategy -{ -private: - using MinMaxRange = RangeTracker::MinMaxRange; -public : - OverrideInputRangeStrategy(RangeTracker& ranges, - LayerBindingId layerId, - const MinMaxRange& minMaxRange) - : m_Ranges(ranges) - , m_LayerId(layerId) - , m_MinMaxRange(minMaxRange){} - - ~OverrideInputRangeStrategy() = default; - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Input : - { - if (m_LayerId == id) - { - m_Ranges.SetRange(layer, 0, m_MinMaxRange.first, m_MinMaxRange.second); - } - break; - } - default: - { - std::cout << "dont know this one" << std::endl; - } - } - } - -private: - /// Mapping from a layer Guid to an array of ranges for outputs - RangeTracker& m_Ranges; - - /// The id of the input layer of which to override the input range - LayerBindingId m_LayerId; - - /// The new input range to be applied to the input layer - MinMaxRange m_MinMaxRange; -}; - - - -/// Visitor object for overriding the input range of the quantized input layers in a network -class OverrideInputRangeVisitor : public LayerVisitorBase -{ -private: - using MinMaxRange = RangeTracker::MinMaxRange; - -public: - OverrideInputRangeVisitor(RangeTracker& ranges, - LayerBindingId layerId, - const MinMaxRange& minMaxRange); - ~OverrideInputRangeVisitor() = default; - - void VisitInputLayer(const IConnectableLayer* layer, LayerBindingId id, const char* name = nullptr) override; - -private: - /// Mapping from a layer Guid to an array of ranges for outputs - RangeTracker& m_Ranges; - - /// The id of the input layer of which to override the input range - LayerBindingId m_LayerId; - - /// The new input range to be applied to the input layer - MinMaxRange m_MinMaxRange; -}; - -} // namespace armnn diff --git a/src/armnn/QuantizerStrategy.cpp b/src/armnn/QuantizerStrategy.cpp deleted file mode 100644 index df20749072..0000000000 --- a/src/armnn/QuantizerStrategy.cpp +++ /dev/null @@ -1,519 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "QuantizerStrategy.hpp" -#include "armnn/utility/PolymorphicDowncast.hpp" - -namespace armnn -{ - -QuantizerStrategy::QuantizerStrategy(const RangeTracker& rangeTracker, - const IQuantizationScheme* quantizationScheme, - bool preserveType) - : m_Ranges(rangeTracker) - , m_QuantizedNetwork(INetwork::Create()) - , m_QuantizationScheme(quantizationScheme) - , m_PreserveType(preserveType) -{ -} - -void QuantizerStrategy::SetQuantizedInputConnections(const IConnectableLayer* srcLayer, - IConnectableLayer* quantizedLayer) -{ - ARMNN_ASSERT(srcLayer); - for (unsigned int i = 0; i < srcLayer->GetNumInputSlots(); i++) - { - const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(i); - const InputSlot* inputSlot = static_cast(&srcInputSlot); - ARMNN_ASSERT(inputSlot); - const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); - - ARMNN_ASSERT(outputSlot); - unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); - Layer& layerToFind = outputSlot->GetOwningLayer(); - - auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); - if (found == m_OriginalToQuantizedGuidMap.end()) - { - // Error in graph traversal order - ARMNN_ASSERT_MSG(false, "Error in graph traversal"); - return; - } - - // Connect the slots in the quantized model - IConnectableLayer* prevQuantizedLayer = m_QuantizedGuidToLayerMap[found->second]; - IInputSlot& newInputSlot = quantizedLayer->GetInputSlot(i); - IOutputSlot& newOutputSlot = prevQuantizedLayer->GetOutputSlot(slotIdx); - newOutputSlot.Connect(newInputSlot); - TensorInfo info(outputSlot->GetTensorInfo()); - - // Only try to set quantization params on tensors that can be quantized - if (inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Boolean && - inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed32 && - inputSlot->GetConnectedOutputSlot()->GetTensorInfo().GetDataType() != DataType::Signed64) - { - // Fetch the min/max ranges that were computed earlier - auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); - info.SetDataType(m_QuantizationScheme->GetDataType()); - info.SetQuantizationOffset(qParams.second); - info.SetQuantizationScale(qParams.first); - } - newOutputSlot.SetTensorInfo(info); - } -} - -ConstTensor QuantizerStrategy::CreateQuantizedBias(const IConnectableLayer* srcLayer, - const ConstTensor& weights, - const Optional& biases, - std::vector& backing) -{ - ARMNN_ASSERT(srcLayer); - const IInputSlot& srcInputSlot = srcLayer->GetInputSlot(0); - auto inputSlot = static_cast(&srcInputSlot); - ARMNN_ASSERT(inputSlot); - const OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot(); - - ARMNN_ASSERT(outputSlot); - unsigned int slotIdx = outputSlot->CalculateIndexOnOwner(); - Layer& layerToFind = outputSlot->GetOwningLayer(); - - auto found = m_OriginalToQuantizedGuidMap.find(layerToFind.GetGuid()); - if (found == m_OriginalToQuantizedGuidMap.end()) - { - // Error in graph traversal order - ARMNN_ASSERT_MSG(false, "Error in graph traversal"); - return biases.value(); - } - - // Fetch the min/max ranges that were computed earlier - auto range = m_Ranges.GetRange(layerToFind.GetGuid(), slotIdx); - OffsetScalePair qParams = m_QuantizationScheme->ComputeScheme(range.first, range.second); - - // Get the quantization scale based on input and weight scale - float scale = qParams.first * weights.GetInfo().GetQuantizationScale(); - - // Set up quantized bias tensor info and allocate space - TensorInfo qInfo(biases.value().GetInfo().GetShape(), DataType::Signed32, scale, 0); - backing.resize(biases.value().GetInfo().GetNumElements()); - - // Convert values to int32 - for (size_t i = 0; i < backing.size(); ++i) - { - float fp32Value = static_cast(biases.value().GetMemoryArea())[i]; - backing[i] = armnn::numeric_cast(fp32Value * ( 1 / scale )); - } - - return ConstTensor(qInfo, backing); -} - -void QuantizerStrategy::RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer) -{ - m_OriginalToQuantizedGuidMap.insert(std::make_pair(srcLayer->GetGuid(), quantizedLayer->GetGuid())); - m_QuantizedGuidToLayerMap.insert(std::make_pair(quantizedLayer->GetGuid(), quantizedLayer)); -} - -void QuantizerStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, - const BaseDescriptor& descriptor, - const std::vector &constants, - const char *name, - const armnn::LayerBindingId id) -{ - IgnoreUnused(id); - - IConnectableLayer* newLayer; - - switch (layer->GetType()) - { - case armnn::LayerType::Addition : - { - newLayer = m_QuantizedNetwork->AddAdditionLayer(name); - break; - } - case armnn::LayerType::Activation : - { - const ActivationDescriptor& activationDescriptor = static_cast(descriptor); - newLayer = m_QuantizedNetwork->AddActivationLayer(activationDescriptor, name); - break; - } - case armnn::LayerType::ArgMinMax : - { - ArgMinMaxDescriptor argMinMaxDescriptor = static_cast(descriptor); - newLayer = m_QuantizedNetwork->AddArgMinMaxLayer(argMinMaxDescriptor, name); - break; - } - case armnn::LayerType::BatchNormalization : - { - - BatchNormalizationDescriptor batchNormalizationDescriptor = - static_cast(descriptor); - std::vector meanBacking; - ConstTensor qMean = CreateQuantizedConst(constants[0], meanBacking); - - std::vector varianceBacking; - ConstTensor qVariance = CreateQuantizedConst(constants[1], varianceBacking); - - std::vector betaBacking; - ConstTensor qBeta = CreateQuantizedConst(constants[2], betaBacking); - - std::vector gammaBacking; - ConstTensor qGamma = CreateQuantizedConst(constants[3], gammaBacking); - - newLayer = m_QuantizedNetwork->AddBatchNormalizationLayer(batchNormalizationDescriptor, - qMean, - qVariance, - qBeta, - qGamma, - name); - break; - } - case armnn::LayerType::BatchToSpaceNd : - { - BatchToSpaceNdDescriptor batchToSpaceNdDescriptor = - static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddBatchToSpaceNdLayer(batchToSpaceNdDescriptor, name); - break; - } - case armnn::LayerType::Comparison : - { - ComparisonDescriptor comparisonDescriptor =static_cast(descriptor); - newLayer = m_QuantizedNetwork->AddComparisonLayer(comparisonDescriptor, name); - break; - } - case armnn::LayerType::Concat : - { - OriginsDescriptor originsDescriptor = static_cast(descriptor); - newLayer = m_QuantizedNetwork->AddConcatLayer(originsDescriptor, name); - break; - } - case armnn::LayerType::Constant : - { - std::vector inputBacking; - ConstTensor qInput = CreateQuantizedConst(constants[0], inputBacking); - - newLayer = m_QuantizedNetwork->AddConstantLayer(qInput, name); - break; - } - case armnn::LayerType::Convolution2d : - { - const armnn::Optional biases = constants.size() == 1 ? - armnn::Optional{} : - armnn::Optional(constants[1]); - - std::vector weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - Optional optionalQBiases; - std::vector biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional(qBiases); - } - Convolution2dDescriptor convolution2dDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddConvolution2dLayer(convolution2dDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::DepthToSpace : - { - DepthToSpaceDescriptor depthToSpaceDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddDepthToSpaceLayer(depthToSpaceDescriptor, name); - break; - } - case armnn::LayerType::DepthwiseConvolution2d : - { - DepthwiseConvolution2dDescriptor depthwiseConvolution2dDescriptor = - static_cast(descriptor); - - const armnn::Optional biases = constants.size() == 1 ? - armnn::Optional{} : - armnn::Optional(constants[1]); - - std::vector weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - Optional optionalQBiases; - std::vector biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional(qBiases); - } - - newLayer = m_QuantizedNetwork->AddDepthwiseConvolution2dLayer( - depthwiseConvolution2dDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::ElementwiseUnary : - { - ElementwiseUnaryDescriptor elementwiseUnaryDescriptor = - static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddElementwiseUnaryLayer(elementwiseUnaryDescriptor, name); - break; - } - case armnn::LayerType::Fill : - { - FillDescriptor fillDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddFillLayer(fillDescriptor, name); - break; - } - case armnn::LayerType::FullyConnected : - { - FullyConnectedDescriptor fullyConnectedDescriptor = - static_cast(descriptor); - - const armnn::Optional biases = constants.size() == 1 ? - armnn::Optional{} : - armnn::Optional(constants[1]); - - std::vector weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - Optional optionalQBiases; - std::vector biasesBacking; - - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional(qBiases); - } - - newLayer = m_QuantizedNetwork->AddFullyConnectedLayer(fullyConnectedDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::Input : - { - const DataType dataType = layer->GetOutputSlot(0).GetTensorInfo().GetDataType(); - IConnectableLayer* inputLayer = m_QuantizedNetwork->AddInputLayer(id, name); - - if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) - { - IConnectableLayer* quantizeLayer = m_QuantizedNetwork->AddQuantizeLayer(); - inputLayer->GetOutputSlot(0).Connect(quantizeLayer->GetInputSlot(0)); - inputLayer->GetOutputSlot(0).SetTensorInfo(layer->GetOutputSlot(0).GetTensorInfo()); - RecordLayer(layer, quantizeLayer); - return; - } - else - { - RecordLayer(layer, inputLayer); - return; - } - } - case armnn::LayerType::InstanceNormalization : - { - InstanceNormalizationDescriptor instanceNormalizationDescriptor = - static_cast(descriptor); - - newLayer = - m_QuantizedNetwork->AddInstanceNormalizationLayer(instanceNormalizationDescriptor, name); - break; - } - case armnn::LayerType::LogSoftmax : - { - LogSoftmaxDescriptor logSoftmaxDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddLogSoftmaxLayer(logSoftmaxDescriptor, name); - break; - } - case armnn::LayerType::Mean : - { - MeanDescriptor meanDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddMeanLayer(meanDescriptor, name); - break; - } - case armnn::LayerType::Multiplication : - { - newLayer = m_QuantizedNetwork->AddMultiplicationLayer(name); - break; - } - case armnn::LayerType::Normalization : - { - NormalizationDescriptor normalizationDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddNormalizationLayer(normalizationDescriptor, name); - break; - } - case armnn::LayerType::Output : - { - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - const DataType& dataType = info.GetDataType(); - newLayer = m_QuantizedNetwork->AddOutputLayer(id, name); - - if (m_PreserveType && (dataType == DataType::Float32 || dataType == DataType::Float16)) - { - IConnectableLayer* dequantizeLayer = m_QuantizedNetwork->AddDequantizeLayer(); - RecordLayer(layer, dequantizeLayer); - SetQuantizedInputConnections(layer, dequantizeLayer); - dequantizeLayer->GetOutputSlot(0).Connect(newLayer->GetInputSlot(0)); - dequantizeLayer->GetOutputSlot(0).SetTensorInfo(info); - return; - } - else - { - break; - } - } - case armnn::LayerType::Pad : - { - PadDescriptor padDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddPadLayer(padDescriptor, name); - break; - } - case armnn::LayerType::Permute : - { - PermuteDescriptor permuteDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddPermuteLayer(permuteDescriptor, name); - break; - } - case armnn::LayerType::Pooling2d : - { - Pooling2dDescriptor pooling2dDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddPooling2dLayer(pooling2dDescriptor, name); - break; - } - case armnn::LayerType::Prelu : - { - newLayer = m_QuantizedNetwork->AddPreluLayer(name); - break; - } - case armnn::LayerType::Reshape : - { - ReshapeDescriptor reshapeDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddReshapeLayer(reshapeDescriptor, name); - break; - } - case armnn::LayerType::Resize : - { - - ResizeBilinearDescriptor resizeBilinearDescriptor = - static_cast(descriptor); - - ResizeDescriptor resizeDescriptor; - resizeDescriptor.m_Method = ResizeMethod::Bilinear; - resizeDescriptor.m_TargetWidth = resizeBilinearDescriptor.m_TargetWidth; - resizeDescriptor.m_TargetHeight = resizeBilinearDescriptor.m_TargetHeight; - resizeDescriptor.m_DataLayout = resizeBilinearDescriptor.m_DataLayout; - - newLayer = m_QuantizedNetwork->AddResizeLayer(resizeDescriptor, name); - break; - } - case armnn::LayerType::Slice : - { - SliceDescriptor sliceDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddSliceLayer(sliceDescriptor, name); - break; - } - case armnn::LayerType::Softmax : - { - SoftmaxDescriptor softmaxDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddSoftmaxLayer(softmaxDescriptor, name); - break; - } - case armnn::LayerType::SpaceToBatchNd : - { - SpaceToBatchNdDescriptor spaceToBatchNdDescriptor = - static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddSpaceToBatchNdLayer(spaceToBatchNdDescriptor, name); - break; - } - case armnn::LayerType::SpaceToDepth : - { - SpaceToDepthDescriptor spaceToDepthDescriptor = static_cast(descriptor); - newLayer = m_QuantizedNetwork->AddSpaceToDepthLayer(spaceToDepthDescriptor, name); - break; - } - case armnn::LayerType::Splitter : - { - SplitterDescriptor splitterDescriptor = static_cast(descriptor); - newLayer = m_QuantizedNetwork->AddSplitterLayer(splitterDescriptor, name); - break; - } - case armnn::LayerType::Stack : - { - StackDescriptor stackDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddStackLayer(stackDescriptor, name); - break; - } - case armnn::LayerType::StridedSlice : - { - StridedSliceDescriptor stridedSliceDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddStridedSliceLayer(stridedSliceDescriptor, name); - break; - } - case armnn::LayerType::Subtraction : - { - newLayer = m_QuantizedNetwork->AddSubtractionLayer( name); - break; - } - case armnn::LayerType::TransposeConvolution2d : - { - - const armnn::Optional biases = constants.size() == 1 ? - armnn::Optional{} : - armnn::Optional(constants[1]); - // quantize weights - std::vector weightsBacking; - ConstTensor qWeights = CreateQuantizedConst(constants[0], weightsBacking); - - // quantize biases - std::vector biasesBacking; - Optional optionalQBiases; - if (biases.has_value()) - { - ConstTensor qBiases = CreateQuantizedBias(layer, qWeights, biases, biasesBacking); - optionalQBiases = Optional(qBiases); - } - - TransposeConvolution2dDescriptor transposeConvolution2dDescriptor = - static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddTransposeConvolution2dLayer(transposeConvolution2dDescriptor, - qWeights, - optionalQBiases, - name); - break; - } - case armnn::LayerType::Transpose : - { - TransposeDescriptor transposeDescriptor = static_cast(descriptor); - - newLayer = m_QuantizedNetwork->AddTransposeLayer(transposeDescriptor, name); - break; - } - default: - { - throw UnimplementedException("Unimplemented layer encountered"); - } - } - RecordLayer(layer, newLayer); - SetQuantizedInputConnections(layer, newLayer); -} - -} - diff --git a/src/armnn/QuantizerStrategy.hpp b/src/armnn/QuantizerStrategy.hpp deleted file mode 100644 index f782959020..0000000000 --- a/src/armnn/QuantizerStrategy.hpp +++ /dev/null @@ -1,63 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "Network.hpp" -#include "NetworkQuantizerUtils.hpp" -#include "StaticRangeStrategy.hpp" - -#include -#include - -namespace armnn -{ -class QuantizerStrategy : public IStrategy -{ -public : - QuantizerStrategy(const RangeTracker& rangeTracker, - const IQuantizationScheme* quantizationScheme, - bool preserveType); - - ~QuantizerStrategy() = default; - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override; - - /// Extract the quantized network - INetworkPtr RetrieveFinalNetwork() { return std::move(m_QuantizedNetwork); } - -private: - /// Connects the layer to preceeding layers and sets the quantization parameters based on recorded ranges - void SetQuantizedInputConnections(const IConnectableLayer* srcLayer, IConnectableLayer* quantizedLayer); - - /// Record the guids so we can easily find the layers later - void RecordLayer(const IConnectableLayer* srcLayer, IConnectableLayer* qLayer); - - /// Sets the bias quantization scale based on input and weight scales - ConstTensor CreateQuantizedBias(const IConnectableLayer* srcLayer, - const ConstTensor& weights, - const Optional& biases, - std::vector& weightsBacking); - - /// Reference to the static range visitor used to retrieve the quantization ranges - const RangeTracker& m_Ranges; - - /// Quantized version of the model we are building up - INetworkPtr m_QuantizedNetwork; - - /// Mapping from input network guids to quantized network guids - std::unordered_map m_OriginalToQuantizedGuidMap; - - /// Mapping from guid to layer in quantized network - std::unordered_map m_QuantizedGuidToLayerMap; - - const IQuantizationScheme* m_QuantizationScheme; - - const bool m_PreserveType; -}; - -} //namespace armnn \ No newline at end of file diff --git a/src/armnn/StaticRangeStrategy.cpp b/src/armnn/StaticRangeStrategy.cpp deleted file mode 100644 index 84b8d24068..0000000000 --- a/src/armnn/StaticRangeStrategy.cpp +++ /dev/null @@ -1,193 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "StaticRangeStrategy.hpp" - -#include -#include -#include - -#include - -namespace armnn -{ - -StaticRangeStrategy::StaticRangeStrategy(RangeTracker& rangeTracker) - : m_RangeTracker(rangeTracker) -{} - -void StaticRangeStrategy::SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max) -{ - m_RangeTracker.SetRange(layer, outputIdx, min, max); -} - -void StaticRangeStrategy::ForwardParentParameters(const IConnectableLayer* layer) -{ - const auto parentRange = m_RangeTracker.GetRange(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), 0); - SetRange(layer, 0, parentRange.first, parentRange.second); -} - - -void StaticRangeStrategy::ExecuteStrategy(const armnn::IConnectableLayer *layer, - const BaseDescriptor &descriptor, - const std::vector &constants, - const char *name, - const armnn::LayerBindingId id) -{ -IgnoreUnused(id, name); - -switch (layer->GetType()) -{ - case armnn::LayerType::Activation : - { - const ActivationDescriptor& activationDescriptor = static_cast(descriptor); - - switch (activationDescriptor.m_Function) - { - // Range is 0, 15 for Abs, Linear, ReLu and Soft ReLu - case ActivationFunction::Abs: - case ActivationFunction::Linear: - case ActivationFunction::ReLu: - case ActivationFunction::SoftReLu: - SetRange(layer, 0, 0.f, 15.f); - break; - case ActivationFunction::BoundedReLu: - SetRange(layer, 0, 0.f, activationDescriptor.m_A); - break; - case ActivationFunction::TanH: - SetRange(layer, 0, -1.f, 1.f); - break; - case ActivationFunction::LeakyReLu: - SetRange(layer, 0, -5.f, 15.f); - break; - default: - SetRange(layer, 0, -15.f, 15.f); - break; - } - break; - } - case armnn::LayerType::Addition : - { - SetRange(layer, 0, -20.f, 20.f); - break; - } - case armnn::LayerType::ArgMinMax : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::BatchToSpaceNd : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::BatchNormalization : - { - SetRange(layer, 0, -15.0f, 15.0f); - break; - } - case armnn::LayerType::Concat : - { - float min = std::numeric_limits::max(); - float max = std::numeric_limits::lowest(); - for (unsigned int i = 0; i < layer->GetNumInputSlots(); ++i) - { - const IOutputSlot* outputSlot = layer->GetInputSlot(i).GetConnection(); - LayerGuid layerId = outputSlot->GetOwningLayerGuid(); - unsigned int slotIndex = outputSlot->CalculateIndexOnOwner(); - RangeTracker::MinMaxRange range = m_RangeTracker.GetRange(layerId, slotIndex); - min = std::min(min, range.first); - max = std::max(max, range.second); - } - SetRange(layer, 0, min, max); - break; - } - case armnn::LayerType::Constant : - { - - if (constants[0].GetDataType() != DataType::Float32) - { - throw InvalidArgumentException("Quantization is supported only for FP32 tensors"); - } - - // Work out the range based on the input constants - unsigned int inputNumElements = constants[0].GetNumElements(); - const float* inputData = reinterpret_cast(constants[0].GetMemoryArea()); - - float min = std::numeric_limits::max(); - float max = std::numeric_limits::lowest(); - - for (unsigned int i = 0; i < inputNumElements; i++) - { - const float inputValue = inputData[i]; - - min = std::min(min, inputValue); - max = std::max(max, inputValue); - } - SetRange(layer, 0, min, max); - break; - } - case armnn::LayerType::Convolution2d : - { - SetRange(layer, 0, -15.0f, 15.0f); - break; - } - case armnn::LayerType::DepthwiseConvolution2d : - { - SetRange(layer, 0, -15.0f, 15.0f); - break; - } - case armnn::LayerType::FullyConnected : - { - SetRange(layer, 0, -15.0f, 15.0f); - break; - } - case armnn::LayerType::Permute : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::Pooling2d : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::Reshape : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::Resize : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::Splitter : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::SpaceToBatchNd : - { - ForwardParentParameters(layer); - break; - } - case armnn::LayerType::Softmax : - { - SetRange(layer, 0, 0.f, 1.f); - break; - } - case armnn::LayerType::StridedSlice : - { - ForwardParentParameters(layer); - break; - } - default: - { - } -} -} - -} //namespace armnn diff --git a/src/armnn/StaticRangeStrategy.hpp b/src/armnn/StaticRangeStrategy.hpp deleted file mode 100644 index ed7cf274fe..0000000000 --- a/src/armnn/StaticRangeStrategy.hpp +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "armnn/LayerVisitorBase.hpp" -#include "RangeTracker.hpp" - -#include -#include - - -namespace armnn -{ - -class StaticRangeStrategy : public IStrategy -{ -public: - StaticRangeStrategy(RangeTracker& rangeTracker); - ~StaticRangeStrategy() = default; - - void ExecuteStrategy(const armnn::IConnectableLayer *layer, - const BaseDescriptor &descriptor, - const std::vector &constants, - const char *name, - const armnn::LayerBindingId id) override; - -private: - /// Set the range for an output slot on a layer - void SetRange(const IConnectableLayer* layer, unsigned int outputIdx, float min, float max); - - void ForwardParentParameters(const IConnectableLayer* layer); - - /// Mapping from a layer Guid to an array of ranges for outputs - RangeTracker& m_RangeTracker; - -}; - -} //namespace armnn diff --git a/src/armnn/test/QuantizerTest.cpp b/src/armnn/test/QuantizerTest.cpp deleted file mode 100644 index a932698674..0000000000 --- a/src/armnn/test/QuantizerTest.cpp +++ /dev/null @@ -1,2220 +0,0 @@ -// -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "../Graph.hpp" -#include "../Network.hpp" -#include "../NetworkQuantizerUtils.hpp" -#include "../OverrideInputRangeVisitor.hpp" - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include - -namespace armnn -{ -using MinMaxRange = std::pair; -using MinMaxRanges = std::vector; -using MinMaxRangeMap = std::unordered_map; - -const float g_AsymmU8QuantizationBase = 255.0f; -// Coinciding with calcution which for AsymmS8 which calculates scale on an unsigned basis -const float g_AsymmS8QuantizationBase = 255.0f; -const float g_SymmS8QuantizationBase = 127.0f; -const float g_SymmS16QuantizationBase = 32767.0f; -const float g_TestTolerance = 0.000001f; - -class TestConnectionPreservation : public LayerVisitorBase -{ -public: - TestConnectionPreservation(INetwork* network) - : LayerVisitorBase() - , m_Network(network) - {} - - void VisitAdditionLayer(const IConnectableLayer* layer, const char*) override - { - CheckLayerName(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), "reLU1"); - CheckLayerName(layer->GetInputSlot(1).GetConnection()->GetOwningLayerGuid(), "reLU2"); - } - - void CheckLayerName(LayerGuid guid, std::string expectedName) - { - auto graph = m_Network->pNetworkImpl->GetGraph(); - bool guidFound = false; - for (Layer* layer : graph) - { - if (layer->GetGuid() == guid) - { - BOOST_CHECK_EQUAL(layer->GetName(), expectedName.c_str()); - guidFound = true; - break; - } - } - if (!guidFound) - { - BOOST_FAIL("No layer matching the GUID was found"); - } - } - -private: - INetwork* m_Network; -}; - -void VisitLayersTopologically(const INetwork* inputNetwork, IStrategy& visitor) -{ - auto graph = inputNetwork->pNetworkImpl->GetGraph().TopologicalSort(); - - ApplyStrategyToLayers(graph, visitor); -} - -TensorInfo GetInputTensorInfo(const INetwork* network) -{ - for (auto&& inputLayer : network->pNetworkImpl->GetGraph().GetInputLayers()) - { - ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); - return inputLayer->GetOutputSlot(0).GetTensorInfo(); - } - throw InvalidArgumentException("Network has no input layers"); -} - -TensorInfo GetInputTensorInfo(const NetworkImpl* network) -{ - for (auto&& inputLayer : network->GetGraph().GetInputLayers()) - { - ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); - return inputLayer->GetOutputSlot(0).GetTensorInfo(); - } - throw InvalidArgumentException("Network has no input layers"); -} - -BOOST_AUTO_TEST_SUITE(Quantizer) - -class TestQuantization : public IStrategy -{ -public: - TestQuantization(const TensorShape &inputShape, const TensorShape &outputShape) - : m_InputShape(inputShape), m_OutputShape(outputShape), m_QuantizerOptions(QuantizerOptions()) - {} - - TestQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& outputShape) - : m_InputShape(inputShape) - , m_OutputShape(outputShape) - , m_QuantizerOptions(options) {} - - void ExecuteStrategy(const armnn::IConnectableLayer *layer, - const BaseDescriptor &descriptor, - const std::vector &constants, - const char *name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(id, name); - - if (layer->GetType() == armnn::LayerType::Output) - { - const TensorInfo &info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_TEST(m_OutputShape == info.GetShape()); - return; - } - - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - - switch (layer->GetType()) - { - case armnn::LayerType::BatchToSpaceNd : - case armnn::LayerType::Permute : - case armnn::LayerType::Pooling2d : - case armnn::LayerType::Reshape : - case armnn::LayerType::Resize : - case armnn::LayerType::SpaceToBatchNd : - case armnn::LayerType::Splitter : - case armnn::LayerType::StridedSlice : - { - CheckDefaultQuantizationSettings(info); - break; - } - case armnn::LayerType::Addition : - { - - // Based off default static range [-20.0f, 20.0f] - TestQuantizationParams(info, {40.0f / g_AsymmU8QuantizationBase, 128}, - {40.0f / g_AsymmS8QuantizationBase, 0}, - {20.0f / g_SymmS8QuantizationBase, 0}, - {20.0f / g_SymmS16QuantizationBase, 0}); - break; - } - case armnn::LayerType::Activation : - { - const ActivationDescriptor& activationDescriptor = static_cast(descriptor); - - switch (activationDescriptor.m_Function) - { - case ActivationFunction::BoundedReLu : - { - // Based off default static range [0.0f, 3.5f] - TestQuantizationParams(info, {3.5f / g_AsymmU8QuantizationBase, 0}, - {3.5f / g_AsymmS8QuantizationBase, -128}, - {3.5f / g_SymmS8QuantizationBase, 0}, - {3.5f / g_SymmS16QuantizationBase, 0}); - break; - } - case ActivationFunction::Elu : - { - TestQuantizationParams( - info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - break; - } - case ActivationFunction::HardSwish : - { - TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - break; - } - case ActivationFunction::LeakyReLu : - { - // Based off default static range [-5.0f, 15.0f] - TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, - {20.0f / g_AsymmS8QuantizationBase,-64}, - {15.0f / g_SymmS8QuantizationBase , 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - break; - } - case ActivationFunction::TanH : - { - TestQuantizationParams(info, {2.0f / g_AsymmU8QuantizationBase, 128}, - {2.0f / g_AsymmS8QuantizationBase, 0}, - {1.0f / g_SymmS8QuantizationBase , 0}, - {1.0f / g_SymmS16QuantizationBase, 0}); - break; - } - default: - { - // Based off default static range [0.0f, 15.0f] - TestQuantizationParams(info, {15.0f / g_AsymmU8QuantizationBase, 0}, - {15.0f / g_AsymmS8QuantizationBase, -128}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - break; - } - } - break; - } - case armnn::LayerType::ArgMinMax : - { - const ArgMinMaxDescriptor& argMinMaxDescriptor = static_cast(descriptor); - - if(argMinMaxDescriptor.m_Function == ArgMinMaxFunction::Max) - { - break; - } - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS16QuantizationBase, 0 }); - break; - } - case armnn::LayerType::BatchNormalization : - { - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams( - info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - - // Test constants - TestConstantQuantizationParams(constants[0].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - TestConstantQuantizationParams(constants[1].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - TestConstantQuantizationParams(constants[2].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - TestConstantQuantizationParams(constants[3].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); - break; - } - case armnn::LayerType::Comparison : - { - - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - - break; - } - case armnn::LayerType::Constant : - { - - // Based off the range of values in the const tensor used for the test: [-2.0f, 6.0f] - TestQuantizationParams(info, {8.0f / g_AsymmU8QuantizationBase, 64}, - {8.0f / g_AsymmS8QuantizationBase, -64}, - {6.0f / g_SymmS8QuantizationBase, 0}, - {6.0f / g_SymmS16QuantizationBase, 0}); - - break; - } - case armnn::LayerType::Convolution2d : - { - if (constants.size() == 1) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); - } - else if (constants.size() == 1) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); - } - break; - } - case armnn::LayerType::DepthwiseConvolution2d : - { - if (constants.size() == 2) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); - } - else if (constants.size() == 1) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); - } - break; - } - case armnn::LayerType::DepthToSpace : - { - const OffsetScalePair qAsymmU8Params{30.0f / g_AsymmU8QuantizationBase, 128}; - const OffsetScalePair qAsymmS8Params{30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params{15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{15.0f / g_SymmS16QuantizationBase, 0}; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - break; - } - case armnn::LayerType::FullyConnected : - { - if (constants.size() == 2) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); - } - else if (constants.size() == 1) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); - } - - break; - } - case armnn::LayerType::Fill : - { - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - break; - } - case armnn::LayerType::Input : - { - BOOST_TEST(m_InputShape == info.GetShape()); - // Based off current default [-15.0f, 15.0f] - TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, - {30.0f / g_AsymmS8QuantizationBase, 0}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - break; - } - case armnn::LayerType::InstanceNormalization : - { - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - break; - } - case armnn::LayerType::LogSoftmax : - { - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - break; - } - case armnn::LayerType::Slice : - { - const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; - const OffsetScalePair qAsymmS8Params{ 30.0f / g_AsymmS8QuantizationBase, 0 }; - const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0 }; - const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; - - TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); - break; - } - case armnn::LayerType::Softmax : - { - // Based off default static range [0.0f, 1.0f] - TestQuantizationParams(info, {1.0f / g_AsymmU8QuantizationBase, 0}, - {1.0f / g_AsymmS8QuantizationBase, -128}, - {1.0f / g_SymmS8QuantizationBase, 0}, - {1.0f / g_SymmS16QuantizationBase, 0}); - break; - } - case armnn::LayerType::SpaceToDepth : - { - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0 }, - { 15.0f / g_SymmS8QuantizationBase, 0 }, - { 15.0f / g_SymmS16QuantizationBase, 0 }); - - break; - } - case armnn::LayerType::Stack : - { - TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); - - TestQuantizationParams(outputInfo, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS16QuantizationBase, 0 }); - break; - } - case armnn::LayerType::TransposeConvolution2d : - { - if (constants.size() == 2) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); - } - else if (constants.size() == 1) - { - TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); - } - break; - } - default: - { - throw UnimplementedException("Unimplemented layer encountered"); - } - } - } - - -protected: - - void CheckDefaultQuantizationSettings(const TensorInfo& info) - { - TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, - {20.0f / g_AsymmS8QuantizationBase,-64}, - {15.0f / g_SymmS8QuantizationBase, 0}, - {15.0f / g_SymmS16QuantizationBase, 0}); - } - - void TestQuantizationParams(const TensorInfo& info, - const OffsetScalePair& qAsymmU8Params, - const OffsetScalePair& qAsymmS8Params, - const OffsetScalePair& qSymmS8Params, - const OffsetScalePair& qSymmS16Params) - { - switch (m_QuantizerOptions.m_ActivationFormat) - { - case DataType::QAsymmU8: - TestQuantizationParamsImpl( - info, DataType::QAsymmU8, qAsymmU8Params.first, qAsymmU8Params.second); - break; - case DataType::QAsymmS8: - TestQuantizationParamsImpl( - info, DataType::QAsymmS8, qAsymmS8Params.first, qAsymmS8Params.second); - break; - case DataType::QSymmS8: - TestQuantizationParamsImpl( - info, DataType::QSymmS8, qSymmS8Params.first, qSymmS8Params.second); - break; - case DataType::QSymmS16: - TestQuantizationParamsImpl( - info, DataType::QSymmS16, qSymmS16Params.first, qSymmS16Params.second); - break; - default: - throw InvalidArgumentException("Unsupported quantization target"); - } - } - - void TestDifferentQuantizationScale(const TensorInfo& info0, const TensorInfo& info1) - { - BOOST_TEST(info0.GetQuantizationScale() != info1.GetQuantizationScale()); - } - - void TestConstantQuantizationParams(const TensorInfo& info, - const OffsetScalePair& params, - DataType dataType = DataType::QAsymmU8) - { - IgnoreUnused(dataType); - TestQuantizationParamsImpl(info, dataType, params.first, params.second); - } - - void TestBiasQuantizationParams(const TensorInfo& info, - const OffsetScalePair& qAsymmU8Params, - const OffsetScalePair& qAsymmS8Params, - const OffsetScalePair& qSymmS8Params, - const OffsetScalePair& qSymmS16Params, - DataType dataType = DataType::QAsymmU8) - { - switch (m_QuantizerOptions.m_ActivationFormat) - { - case DataType::QAsymmU8: - TestQuantizationParamsImpl(info, dataType, qAsymmU8Params.first, qAsymmU8Params.second); - break; - case DataType::QAsymmS8: - TestQuantizationParamsImpl(info, dataType, qAsymmS8Params.first, qAsymmS8Params.second); - break; - case DataType::QSymmS8: - TestQuantizationParamsImpl(info, dataType, qSymmS8Params.first, qSymmS8Params.second); - break; - case DataType::QSymmS16: - TestQuantizationParamsImpl(info, dataType, qSymmS16Params.first, qSymmS16Params.second); - break; - default: - throw InvalidArgumentException("Unsupported quantization target"); - } - } - - void TestQuantizationOnLayersWithBiases(const IConnectableLayer* layer, - const ConstTensor& weights, - const Optional& biases) - { - TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); - float inputScaleQAsymmU8 = 30.0f / g_AsymmU8QuantizationBase; - float inputScaleQAsymmS8 = 30.0f / g_AsymmS8QuantizationBase; - float inputScaleQSymmS8 = 15.0f / g_SymmS8QuantizationBase; - float inputScaleQSymmS16 = 15.0f / g_SymmS16QuantizationBase; - float weightsScale = 3.0f / g_AsymmU8QuantizationBase; - - // Based off default static range [-15.0f, 15.0f] - TestQuantizationParams(info, {inputScaleQAsymmU8, 128}, - {inputScaleQAsymmS8, 0}, - {inputScaleQSymmS8, 0}, - {inputScaleQSymmS16, 0}); - - TestConstantQuantizationParams(weights.GetInfo(), {weightsScale, 85}); - - if (biases.has_value()) - { - TestBiasQuantizationParams(biases.value().GetInfo(), - {inputScaleQAsymmU8 * weightsScale, 0}, - {inputScaleQAsymmS8 * weightsScale, 0}, - {inputScaleQSymmS8 * weightsScale, 0}, - {inputScaleQSymmS16 * weightsScale, 0}, - DataType::Signed32); - } - } - - TensorShape m_InputShape; - TensorShape m_OutputShape; - -private: - void TestQuantizationParamsImpl(const TensorInfo& info, DataType dataType, float scale, int32_t offset) - { - BOOST_TEST((info.GetDataType() == dataType)); - BOOST_TEST(info.GetQuantizationOffset() == offset); - BOOST_CHECK_CLOSE(info.GetQuantizationScale(), scale, g_TestTolerance); - } - - QuantizerOptions m_QuantizerOptions; -}; - -void TestNetwork(INetwork* network, const TensorShape inShape, const TensorShape outShape) -{ - const QuantizerOptions qAsymmU8Options(DataType::QAsymmU8); - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network, qAsymmU8Options)->ExportNetwork(); - TestQuantization validatorQAsymmU8(inShape, outShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network, qAsymmS8Options)->ExportNetwork(); - TestQuantization validatorQAsymmS8(qAsymmS8Options, inShape, outShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network, qSymmS8Options)->ExportNetwork(); - TestQuantization validatorQSymmS8(qSymmS8Options, inShape, outShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network, qSymmS16options)->ExportNetwork(); - TestQuantization validatorQSymmS16(qSymmS16options, inShape, outShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); -} - -void TestNetwork(INetwork* network, const TensorShape shape) -{ - TestNetwork(network, shape, shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeAddition) -{ - INetworkPtr network = INetwork::Create(); - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* input1 = network->AddInputLayer(1); - IConnectableLayer* addition = network->AddAdditionLayer(); - IConnectableLayer* output = network->AddOutputLayer(2); - - // Establish connections - input0->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); - input1->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); - addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - input0->GetOutputSlot(0).SetTensorInfo(info); - input1->GetOutputSlot(0).SetTensorInfo(info); - addition->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -INetworkPtr CreateNetworkWithActivationLayer(const ActivationDescriptor& descriptor, const TensorShape& shape) -{ - INetworkPtr network = INetwork::Create(); - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* activation = network->AddActivationLayer(descriptor); - IConnectableLayer* output = network->AddOutputLayer(2); - - // Establish connections - input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - TensorInfo info(shape, DataType::Float32); - input0->GetOutputSlot(0).SetTensorInfo(info); - activation->GetOutputSlot(0).SetTensorInfo(info); - - return network; -} - -INetworkPtr CreateNetworkWithArgMinMaxLayer(const ArgMinMaxDescriptor& descriptor, const TensorShape& shape) -{ - INetworkPtr network = INetwork::Create(); - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* activation = network->AddArgMinMaxLayer(descriptor); - IConnectableLayer* output = network->AddOutputLayer(2); - - // Establish connections - input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - TensorInfo inInfo(shape, DataType::Float32); - input0->GetOutputSlot(0).SetTensorInfo(inInfo); - TensorInfo outInfo({1}, DataType::Signed32); - activation->GetOutputSlot(0).SetTensorInfo(outInfo); - - return network; -} - -INetworkPtr CreateNetworkWithInputOutputLayers() -{ - INetworkPtr network = INetwork::Create(); - - // Add input/output layers - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - inputLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - TensorShape shape{8U}; - TensorInfo info(shape, DataType::Float32); - inputLayer->GetOutputSlot(0).SetTensorInfo(info); - - return network; -} - -BOOST_AUTO_TEST_CASE(InputOutputLayerDynamicQuant) -{ - INetworkPtr network = CreateNetworkWithInputOutputLayers(); - - armnn::TensorInfo tensorInfo = GetInputTensorInfo(network.get()); - - // Outliers -56 and 98 - std::vector inputData({0, 0, 0, -56, 98, 0, 0, 0}); - armnn::ConstTensor inputTensor(tensorInfo, inputData.data()); - - InputTensors inputTensors; - inputTensors.push_back(std::make_pair(0, inputTensor)); - - armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get()); - - quantizer->Refine(inputTensors); - - // Outliers -77 and 65 - std::vector inputData2({0, -77, 0, -56, 65, 0, 0, 0}); - armnn::ConstTensor inputTensor2(tensorInfo, inputData2.data()); - InputTensors inputTensors2; - inputTensors2.push_back(std::make_pair(0, inputTensor2)); - - quantizer->Refine(inputTensors2); - - INetworkPtr quantizedNetwork = quantizer->ExportNetwork(); - // Output Layer should be quantized for a min max of -77 and 98 - // according to QU8 Quantization Scheme - std::unique_ptr quantizationScheme = std::make_unique(); - OffsetScalePair qParams = quantizationScheme->ComputeScheme(-77.0, 98.0); - -class TestOutputStrategy : public IStrategy -{ - public : - TestOutputStrategy(const OffsetScalePair& offsetScalePair, const DataType& dataType) : - m_OffsetScalePair(offsetScalePair), m_DataType(dataType) {} - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Output : - { - const TensorInfo &info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType, - std::string(armnn::GetDataTypeName(info.GetDataType())) - .append(" == ").append(armnn::GetDataTypeName(m_DataType))); - // int_32t - BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second); - // float - BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, - boost::test_tools::tolerance(0.001)); - break; - } - default: - {} - } - } - -private: - const OffsetScalePair m_OffsetScalePair; - const DataType m_DataType; -}; - - TestOutputStrategy strategy(qParams, quantizationScheme->GetDataType()); - quantizedNetwork->ExecuteStrategy(strategy); -} - -BOOST_AUTO_TEST_CASE(QuantizeAbsActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::Abs; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeArgMax) -{ - ArgMinMaxDescriptor descriptor; - descriptor.m_Function = ArgMinMaxFunction::Max; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithArgMinMaxLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeLinearActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::Linear; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeReLuActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::ReLu; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeSoftReLuActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::SoftReLu; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeBoundedReluActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::BoundedReLu; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeTanHActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::TanH; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeLeakyReLuActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::LeakyReLu; - descriptor.m_A = 3.5f; - descriptor.m_B = -10.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - - -BOOST_AUTO_TEST_CASE(QuantizeELuActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::Elu; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} -BOOST_AUTO_TEST_CASE(QuantizeHardSwishActivation) -{ - ActivationDescriptor descriptor; - descriptor.m_Function = ActivationFunction::HardSwish; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - - -BOOST_AUTO_TEST_CASE(QuantizeBatchNorm) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{3U}; - TensorInfo info(shape, DataType::Float32); - - std::vector meanData{-1.0f, 1.5f, 2.0f}; - std::vector varData{-1.0f, 1.5f, 2.0f}; - std::vector betaData{-1.0f, 1.5f, 2.0f}; - std::vector gammaData{-1.0f, 1.5f, 2.0f}; - - ConstTensor mean(info, meanData); - ConstTensor var(info, varData); - ConstTensor beta(info, betaData); - ConstTensor gamma(info, gammaData); - - BatchNormalizationDescriptor desc; - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* batchNorm = network->AddBatchNormalizationLayer(desc, mean, var, beta, gamma); - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - input0->GetOutputSlot(0).Connect(batchNorm->GetInputSlot(0)); - batchNorm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - input0->GetOutputSlot(0).SetTensorInfo(info); - batchNorm->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeDepthToSpace) -{ - const TensorShape inputShape { 1, 2, 2, 4 }; - const TensorShape outputShape{ 1, 4, 4, 1 }; - - const TensorInfo inputInfo (inputShape, DataType::Float32); - const TensorInfo outputInfo(outputShape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - const DepthToSpaceDescriptor descriptor(2, armnn::DataLayout::NHWC); - - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* depthToSpaceLayer = network->AddDepthToSpaceLayer(descriptor); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).Connect(depthToSpaceLayer->GetInputSlot(0)); - depthToSpaceLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); - depthToSpaceLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); - - TestNetwork(network.get(), inputShape, outputShape); -} - -BOOST_AUTO_TEST_CASE(OverrideInputRangeEmptyNetwork) -{ - RangeTracker ranges; - RangeTracker::MinMaxRange minMaxRange(-12.3f, 45.6f); // Range to use for the override - - NetworkImpl network; // Empty network - auto inputLayers = network.GetGraph().GetInputLayers(); // Empty list of input layers - - OverrideInputRangeStrategy overrideInputRangeStrategy(ranges, 0, minMaxRange); - ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy); - - BOOST_CHECK(ranges.IsEmpty()); // Check that the map of ranges remained untouched -} - -BOOST_AUTO_TEST_CASE(OverrideInputRangeNoInputLayers) -{ - RangeTracker ranges; - MinMaxRange minMaxRange(-12.3f, 45.6f); // Range to use for the override - - NetworkImpl network; - network.AddAdditionLayer(); // Network with no input layers - auto inputLayers = network.GetGraph().GetInputLayers(); // Empty list of input layers - - OverrideInputRangeStrategy overrideInputRangeStrategy(ranges, 0, minMaxRange); - ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy); - - BOOST_CHECK(ranges.IsEmpty()); // Check that the map of ranges remained untouched -} - -BOOST_AUTO_TEST_CASE(OverrideInputRangeInputLayers) -{ - RangeTracker ranges; - MinMaxRange minMaxRange(-12.3f, 45.6f); // Range to use for the override - - NetworkImpl network; - - // Adding the layers - IConnectableLayer* input0 = network.AddInputLayer(0); - IConnectableLayer* input1 = network.AddInputLayer(1); - IConnectableLayer* addition = network.AddAdditionLayer(); - IConnectableLayer* output = network.AddOutputLayer(2); - - // Connecting the layer - input0->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); - input1->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); - addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Setting the TensorInfos - TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - input0->GetOutputSlot(0).SetTensorInfo(info); - input1->GetOutputSlot(0).SetTensorInfo(info); - addition->GetOutputSlot(0).SetTensorInfo(info); - - auto inputLayers = network.GetGraph().GetInputLayers(); // List of input layers - - // Trying to override the input range for the input layer with binding id 3 (does not exist in the network) - OverrideInputRangeStrategy overrideInputRangeStrategy3(ranges, 3, minMaxRange); - ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy3); - - // Check that the map of ranges remained untouched - BOOST_CHECK(ranges.IsEmpty()); - - // Override the input range for the input layer with binding id 1 - OverrideInputRangeStrategy overrideInputRangeStrategy1(ranges, 1, minMaxRange); - ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy1); - - // Check that the map of ranges has been populated - BOOST_CHECK(!ranges.IsEmpty()); - - // Check that an entry for the input layer with binding id 0 does not exist - BOOST_CHECK(!ranges.HasRanges(input0->GetGuid())); - - // Check that an entry for the input layer with binding id 1 exists - BOOST_CHECK(ranges.HasRanges(input1->GetGuid())); - - // Check the the overridden values are what we intended to set - BOOST_CHECK(ranges.GetRange(input1->GetGuid(), 0) == minMaxRange); -} - -INetworkPtr CreateNetworkWithFullyConnectedLayer(const bool biasEnabled, - const TensorShape& inputShape, - const TensorShape& outputShape) -{ - FullyConnectedDescriptor desc; - desc.m_BiasEnabled = biasEnabled; - INetworkPtr network = INetwork::Create(); - - const TensorInfo info(inputShape, DataType::Float32); - const TensorInfo outputInfo(outputShape, DataType::Float32); - - std::vector weightsData{-1.0f, 1.5f, 2.0f}; - ConstTensor weights(info, weightsData); - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* fullyConnected; - Optional optionalBias; - std::vector biasData{10.0f, 20.0f, 30.0f}; - if (desc.m_BiasEnabled) - { - ConstTensor bias(info, biasData); - optionalBias = Optional(bias); - } - fullyConnected = network->AddFullyConnectedLayer(desc, weights, optionalBias); - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - input0->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0)); - fullyConnected->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - input0->GetOutputSlot(0).SetTensorInfo(info); - fullyConnected->GetOutputSlot(0).SetTensorInfo(outputInfo); - - return network; -} - -void ValidateFullyConnectedLayer(const bool biasEnabled) -{ - const TensorShape shape{3U}; - INetworkPtr network = CreateNetworkWithFullyConnectedLayer(biasEnabled, shape, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeFill) -{ - const TensorShape tensorShape{ 1U }; - const TensorInfo tensorInfo(tensorShape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - - FillDescriptor descriptor; - descriptor.m_Value = 1; - - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* fillLayer = network->AddFillLayer(descriptor); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).Connect(fillLayer->GetInputSlot(0)); - fillLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - fillLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - TestNetwork(network.get(), tensorShape); -} - -BOOST_AUTO_TEST_CASE(QuantizeFullyConnected) -{ - ValidateFullyConnectedLayer(false); -} - -BOOST_AUTO_TEST_CASE(QuantizeFullyConnectedBiasEnabled) -{ - ValidateFullyConnectedLayer(true); -} - -void TestQuantizeConvolution2d(bool useBiases) -{ - INetworkPtr network = INetwork::Create(); - - TensorShape shape{3U}; - TensorInfo info(shape, DataType::Float32); - - std::vector weightsData{-1.0f, 1.5f, 2.0f}; - ConstTensor weights(info, weightsData); - - Convolution2dDescriptor descriptor; - descriptor.m_BiasEnabled = useBiases; - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* conv2d; - Optional optionalBiases; - std::vector biasesData{-1.0f, 1.5f, 2.0f}; - if (useBiases) - { - ConstTensor biases(info, biasesData); - optionalBiases = Optional(biases); - } - conv2d = network->AddConvolution2dLayer(descriptor, weights, optionalBiases); - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - input0->GetOutputSlot(0).Connect(conv2d->GetInputSlot(0)); - conv2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - input0->GetOutputSlot(0).SetTensorInfo(info); - conv2d->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeConvolution2d) -{ - TestQuantizeConvolution2d(false); -} - -BOOST_AUTO_TEST_CASE(QuantizeConvolution2dWithBiases) -{ - TestQuantizeConvolution2d(true); -} - -void TestQuantizeDepthwiseConvolution2d(bool useBiases) -{ - INetworkPtr network = INetwork::Create(); - - TensorShape shape{3U}; - TensorInfo info(shape, DataType::Float32); - - std::vector weightsData{-1.0f, 1.5f, 2.0f}; - ConstTensor weights(info, weightsData); - - DepthwiseConvolution2dDescriptor descriptor; - descriptor.m_BiasEnabled = useBiases; - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* depthwiseConv2d; - Optional optionalBiases; - std::vector biasesData{-1.0f, 1.5f, 2.0f}; - if (useBiases) - { - ConstTensor biases(info, biasesData); - optionalBiases = Optional(biases); - } - depthwiseConv2d = network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBiases); - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - input0->GetOutputSlot(0).Connect(depthwiseConv2d->GetInputSlot(0)); - depthwiseConv2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - //Set TensorInfo - input0->GetOutputSlot(0).SetTensorInfo(info); - depthwiseConv2d->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeDepthwiseConvolution2d) -{ - TestQuantizeDepthwiseConvolution2d(false); -} - -BOOST_AUTO_TEST_CASE(QuantizeDepthwiseConvolution2dWithBiases) -{ - TestQuantizeDepthwiseConvolution2d(true); -} - -BOOST_AUTO_TEST_CASE(QuantizeInstanceNormalization) -{ - const TensorShape shape{ 1, 4, 4, 1 }; - const TensorInfo tensorInfo(shape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* instanceNormLayer = network->AddInstanceNormalizationLayer(InstanceNormalizationDescriptor()); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).Connect(instanceNormLayer->GetInputSlot(0)); - instanceNormLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - instanceNormLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeLogSoftmax) -{ - const TensorShape tensorShape{ 1U }; - const TensorInfo tensorInfo(tensorShape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - - LogSoftmaxDescriptor descriptor; - descriptor.m_Beta = 1.0f; - - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* logSoftmaxLayer = network->AddLogSoftmaxLayer(descriptor); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).Connect(logSoftmaxLayer->GetInputSlot(0)); - logSoftmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - logSoftmaxLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - TestNetwork(network.get(), tensorShape); -} - -INetworkPtr CreateNetworkWithSoftmaxLayer(const SoftmaxDescriptor& descriptor, const TensorShape& shape) -{ - INetworkPtr network = INetwork::Create(); - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor); - IConnectableLayer* output = network->AddOutputLayer(2); - - // Establish connections - input0->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); - softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - TensorInfo info(shape, DataType::Float32); - input0->GetOutputSlot(0).SetTensorInfo(info); - softmax->GetOutputSlot(0).SetTensorInfo(info); - - return network; -} - -BOOST_AUTO_TEST_CASE(QuantizeSoftmax) -{ - SoftmaxDescriptor descriptor; - descriptor.m_Beta = 1.0f; - - const TensorShape shape{1U}; - INetworkPtr network = CreateNetworkWithSoftmaxLayer(descriptor, shape); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeStandIn) -{ - const TensorShape tensorShape{ 1U }; - const TensorInfo tensorInfo(tensorShape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - - StandInDescriptor descriptor; - descriptor.m_NumInputs = 1; - descriptor.m_NumOutputs = 1; - - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* standInLayer = network->AddStandInLayer(descriptor); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).Connect(standInLayer->GetInputSlot(0)); - standInLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - standInLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - // test QAsymmU8 quantization - BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get())->ExportNetwork(), - armnn::UnimplementedException); - - // test QAsymmS8 quantization - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(), - armnn::UnimplementedException); - - // test QuantisedSymmS16 quantization - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(), - armnn::UnimplementedException); - - // test QuantisedSymmS16 quantization - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(), - armnn::UnimplementedException); -} - -IConnectableLayer* CreateStartOfLeakyReluNetwork(INetwork* network, const TensorInfo& info) -{ - ActivationDescriptor activationDescriptor; - activationDescriptor.m_Function = ActivationFunction::LeakyReLu; - activationDescriptor.m_A = 3.5f; - activationDescriptor.m_B = -10.0f; - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* activation = network->AddActivationLayer(activationDescriptor); - - // Establish connections - input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - - // Set TensorInfo - input0->GetOutputSlot(0).SetTensorInfo(info); - activation->GetOutputSlot(0).SetTensorInfo(info); - - return activation; -} - -void CompleteLeakyReluNetwork(INetwork* network, - IConnectableLayer* activation, - IConnectableLayer* layerUnderTest, - const TensorInfo& info) -{ - // Add the output Layer - IConnectableLayer* output = network->AddOutputLayer(3); - - // Establish connections - activation->GetOutputSlot(0).Connect(layerUnderTest->GetInputSlot(0)); - layerUnderTest->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - //Set TensorInfo - layerUnderTest->GetOutputSlot(0).SetTensorInfo(info); -} - -BOOST_AUTO_TEST_CASE(QuantizePermute) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - PermuteDescriptor desc; - IConnectableLayer* permute = network->AddPermuteLayer(desc); - - CompleteLeakyReluNetwork(network.get(), activation, permute, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeSpaceToBatch) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - SpaceToBatchNdDescriptor descriptor; - IConnectableLayer* spaceToBatch = network->AddSpaceToBatchNdLayer(descriptor); - - CompleteLeakyReluNetwork(network.get(), activation, spaceToBatch, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeSpaceToDepth) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{ 1u }; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - IConnectableLayer* spaceToDepth = network->AddSpaceToDepthLayer(SpaceToDepthDescriptor()); - - CompleteLeakyReluNetwork(network.get(), activation, spaceToDepth, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizePooling2d) -{ - auto network = INetwork::Create(); - - TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - Pooling2dDescriptor desc; - ActivationDescriptor activationDescriptor; - activationDescriptor.m_Function = ActivationFunction::LeakyReLu; - activationDescriptor.m_A = 3.5f; - activationDescriptor.m_B = -10.0f; - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* activation = network->AddActivationLayer(activationDescriptor); - IConnectableLayer* pooling2d = network->AddPooling2dLayer(desc); - IConnectableLayer* output = network->AddOutputLayer(3); - - // Establish connections - input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); - activation->GetOutputSlot(0).Connect(pooling2d->GetInputSlot(0)); - pooling2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo - input0->GetOutputSlot(0).SetTensorInfo(info); - activation->GetOutputSlot(0).SetTensorInfo(info); - pooling2d->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeConstant) -{ - INetworkPtr network = INetwork::Create(); - - // Constant layer data - std::vector data = {-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; - const TensorShape shape{1U, 1U, 3U, 3U}; - TensorInfo tensorInfo(shape, DataType::Float32); - ConstTensor constantTensor(tensorInfo, data); - - // Add the layers - IConnectableLayer* input = network->AddInputLayer(0); - IConnectableLayer* constant = network->AddConstantLayer(constantTensor); - IConnectableLayer* addition = network->AddAdditionLayer(); - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - input->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); - constant->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); - addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set TensorInfo in the remaining layers - input->GetOutputSlot(0).SetTensorInfo(tensorInfo); - addition->GetOutputSlot(0).SetTensorInfo(tensorInfo); - constant->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeArgMinMax) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape inputShape{ 1, 1, 1, 5 }; - const TensorShape outputShape{ 1, 1, 1 }; - - TensorInfo inputInfo(inputShape, DataType::Float32); - TensorInfo outputInfo(outputShape, DataType::Float32); - - // Add the input layers - IConnectableLayer* input = network->AddInputLayer(0); - - // Add the layer under test - ArgMinMaxDescriptor argMinMaxDescriptor; - argMinMaxDescriptor.m_Function = ArgMinMaxFunction::Max; - IConnectableLayer* argMinMaxLayer = network->AddArgMinMaxLayer(argMinMaxDescriptor); - - // Add the output layers - IConnectableLayer* output = network->AddOutputLayer(1); - - // Establish connections - input->GetOutputSlot(0).Connect(argMinMaxLayer->GetInputSlot(0)); - argMinMaxLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set tensor info - input->GetOutputSlot(0).SetTensorInfo(inputInfo); - argMinMaxLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); - - TestNetwork(network.get(), inputShape, outputShape); -} - -BOOST_AUTO_TEST_CASE(QuantizeComparison) -{ - const TensorShape tensorShape{ 1u }; - const TensorInfo tensorInfo(tensorShape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - ComparisonDescriptor descriptor(ComparisonOperation::LessOrEqual); - - IConnectableLayer* inputLayer0 = network->AddInputLayer(0); - IConnectableLayer* inputLayer1 = network->AddInputLayer(1); - IConnectableLayer* comparisonLayer = network->AddComparisonLayer(descriptor); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer0->GetOutputSlot(0).Connect(comparisonLayer->GetInputSlot(0)); - inputLayer1->GetOutputSlot(0).Connect(comparisonLayer->GetInputSlot(1)); - comparisonLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer0->GetOutputSlot(0).SetTensorInfo(tensorInfo); - inputLayer1->GetOutputSlot(0).SetTensorInfo(tensorInfo); - comparisonLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); - - TestNetwork(network.get(), tensorShape); -} - -BOOST_AUTO_TEST_CASE(QuantizeConcat) -{ - class TestConcatQuantization : public TestQuantization - { - public: - TestConcatQuantization(const TensorShape& inputShape, const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestConcatQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Input : - break; - case armnn::LayerType::Output : - break; - case armnn::LayerType::Concat : - { - TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams( - outputInfo, {60.8f / g_AsymmU8QuantizationBase, 65}, - {60.8f / g_SymmS8QuantizationBase, -63}, - {45.3f / g_SymmS8QuantizationBase, 0}, - {45.3f / g_SymmS16QuantizationBase, 0}); - - TensorInfo inputInfo0 = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - TensorInfo inputInfo1 = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); - TensorInfo inputInfo2 = layer->GetInputSlot(2).GetConnection()->GetTensorInfo(); - - TestDifferentQuantizationScale(inputInfo0, inputInfo1); - TestDifferentQuantizationScale(inputInfo0, inputInfo2); - TestDifferentQuantizationScale(inputInfo1, inputInfo2); - TestDifferentQuantizationScale(inputInfo0, outputInfo); - break; - } - default: - {} - } - } - }; - - INetworkPtr network = INetwork::Create(); - - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* input1 = network->AddInputLayer(1); - IConnectableLayer* input2 = network->AddInputLayer(2); - - OriginsDescriptor descriptor(3, 1); - IConnectableLayer* concatLayer = network->AddConcatLayer(descriptor); - - IConnectableLayer* output0 = network->AddOutputLayer(3); - - // Establish connections - input0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0)); - input1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1)); - input2->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(2)); - concatLayer->GetOutputSlot(0).Connect(output0->GetInputSlot(0)); - - // Set TensorInfo - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - input0->GetOutputSlot(0).SetTensorInfo(info); - input1->GetOutputSlot(0).SetTensorInfo(info); - input2->GetOutputSlot(0).SetTensorInfo(info); - concatLayer->GetOutputSlot(0).SetTensorInfo(info); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkQuantizerPtr quantizerPtrQAsymmU8 = INetworkQuantizer::Create(network.get()); - INetworkQuantizerPtr quantizerPtrQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options); - INetworkQuantizerPtr quantizerPtrQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options); - // Override the input ranges - float min = -15.5f; - float max = 45.3f; - - quantizerPtrQAsymmU8->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); - quantizerPtrQAsymmU8->OverrideInputRange(1, (min + 6.7f), max); - quantizerPtrQAsymmU8->OverrideInputRange(2, min, (max - 7.8f)); - - quantizerPtrQSymmS8->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); - quantizerPtrQSymmS8->OverrideInputRange(1, (min + 6.7f), max); - quantizerPtrQSymmS8->OverrideInputRange(2, min, (max - 7.8f)); - - quantizerPtrQSymmS16->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); - quantizerPtrQSymmS16->OverrideInputRange(1, (min + 6.7f), max); - quantizerPtrQSymmS16->OverrideInputRange(2, min, (max - 7.8f)); - - INetworkPtr quantizedNetworkQAsymmU8 = quantizerPtrQAsymmU8->ExportNetwork(); - TestConcatQuantization validatorQAsymmU8(shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - INetworkPtr quantizedNetworkQSymmS8 = quantizerPtrQSymmS8->ExportNetwork(); - TestConcatQuantization validatorQSymmS8(qSymmS8Options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - INetworkPtr quantizedNetworkQSymmS16 = quantizerPtrQSymmS16->ExportNetwork(); - TestConcatQuantization validatorQSymmS16(qSymmS16options, shape, shape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); -} - -BOOST_AUTO_TEST_CASE(QuantizeReshape) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - ReshapeDescriptor descriptor({1, 2, 3, 4}); - IConnectableLayer* reshape = network->AddReshapeLayer(descriptor); - - CompleteLeakyReluNetwork(network.get(), activation, reshape, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeSplitter) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{3U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - ViewsDescriptor splitterDesc(2,4); - IConnectableLayer* splitter = network->AddSplitterLayer(splitterDesc); - CompleteLeakyReluNetwork(network.get(), activation, splitter, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeResize) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - ResizeDescriptor descriptor; - descriptor.m_TargetHeight = 3; - descriptor.m_TargetWidth = 3; - IConnectableLayer* resizeLayer = network->AddResizeLayer(descriptor); - - CompleteLeakyReluNetwork(network.get(), activation, resizeLayer, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeStridedSlice) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{3U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - StridedSliceDescriptor stridedSliceDesc; - IConnectableLayer* stridedSlice = network->AddStridedSliceLayer(stridedSliceDesc); - - CompleteLeakyReluNetwork(network.get(), activation, stridedSlice, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeBatchToSpace) -{ - INetworkPtr network = INetwork::Create(); - - const TensorShape shape{1U}; - TensorInfo info(shape, DataType::Float32); - - IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); - - // Add the layer under test - BatchToSpaceNdDescriptor descriptor; - IConnectableLayer* batchToSpace = network->AddBatchToSpaceNdLayer(descriptor); - - CompleteLeakyReluNetwork(network.get(), activation, batchToSpace, info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizePrelu) -{ - class TestPreluQuantization : public TestQuantization - { - public: - TestPreluQuantization(const TensorShape& inputShape, - const TensorShape& alphaShape, - const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) - , m_AlphaShape(alphaShape) - {} - - TestPreluQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& alphaShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) - , m_AlphaShape(alphaShape) - {} - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Input : - { - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - - switch (id) - { - case 0: // Input - BOOST_TEST(m_InputShape == info.GetShape()); - break; - case 1: // Alpha - BOOST_TEST(m_AlphaShape == info.GetShape()); - break; - default: - throw InvalidArgumentException("Invalid layer binding id for PReLU layer"); - } - - // Based off current default [-15.0f, 15.0f] - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 - { 30.0f / g_AsymmS8QuantizationBase, 0}, // QASymmS8 - { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 - { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 - break; - } - case armnn::LayerType::Output : - { - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_TEST(m_OutputShape == info.GetShape()); - break; - } - case armnn::LayerType::Prelu : - { - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - TestQuantizationParams(info, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 - { 30.0f / g_AsymmS8QuantizationBase, 0}, // QAsymmS8 - { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 - { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 - break; - } - default: - {} - } - } - - private: - TensorShape m_AlphaShape; - }; - - INetworkPtr network = INetwork::Create(); - - const TensorShape inputShape{ 4, 1, 2 }; - const TensorShape alphaShape{ 5, 4, 3, 1 }; - const TensorShape outputShape{ 5, 4, 3, 2 }; - TensorInfo inputInfo(inputShape, DataType::Float32); - TensorInfo alphaInfo(alphaShape, DataType::Float32); - TensorInfo outputInfo(outputShape, DataType::Float32); - - // Add the input layers - IConnectableLayer* input = network->AddInputLayer(0); - IConnectableLayer* alpha = network->AddInputLayer(1); - - // Add the layer under test - IConnectableLayer* prelu = network->AddPreluLayer("prelu"); - - // Add the output layers - IConnectableLayer* output = network->AddOutputLayer(0); - - // Establish connections - input->GetOutputSlot(0).Connect(prelu->GetInputSlot(0)); - alpha->GetOutputSlot(0).Connect(prelu->GetInputSlot(1)); - prelu->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - // Set tensor info - input->GetOutputSlot(0).SetTensorInfo(inputInfo); - alpha->GetOutputSlot(0).SetTensorInfo(alphaInfo); - prelu->GetOutputSlot(0).SetTensorInfo(outputInfo); - - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestPreluQuantization validatorQAsymmU8(inputShape, alphaShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestPreluQuantization validatorQAsymmS8(qAsymmS8Options, inputShape, alphaShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestPreluQuantization validatorQSymmS8(qSymmS8Options, inputShape, alphaShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestPreluQuantization validatorQSymmS16(qSymmS16options, inputShape, alphaShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); -} - -void TestQuantizeTransposeConvolution2d(bool useBiases) -{ - INetworkPtr network = INetwork::Create(); - - TensorShape shape{ 3 }; - TensorInfo info(shape, DataType::Float32); - - std::initializer_list floatData{ -1.0f, 1.5f, 2.0f }; - std::vector weightsData(floatData); - ConstTensor weights(info, weightsData); - - TransposeConvolution2dDescriptor descriptor; - descriptor.m_BiasEnabled = useBiases; - - // construct network - IConnectableLayer* input = network->AddInputLayer(0); - Optional optionalBiases; - std::vector biasesData(floatData); - if (useBiases) - { - ConstTensor biases(info, biasesData); - optionalBiases = Optional(biases); - } - IConnectableLayer* transposeConv2d = network->AddTransposeConvolution2dLayer(descriptor, weights, optionalBiases); - IConnectableLayer* output = network->AddOutputLayer(1); - - input->GetOutputSlot(0).Connect(transposeConv2d->GetInputSlot(0)); - transposeConv2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - input->GetOutputSlot(0).SetTensorInfo(info); - transposeConv2d->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -BOOST_AUTO_TEST_CASE(QuantizeTransposeConvolution2d) -{ - TestQuantizeTransposeConvolution2d(false); -} - -BOOST_AUTO_TEST_CASE(QuantizeTransposeConvolution2dWithBiases) -{ - TestQuantizeTransposeConvolution2d(true); -} - -BOOST_AUTO_TEST_CASE(QuantizeStack) -{ - class TestStackQuantization : public TestQuantization - { - public: - TestStackQuantization(const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(inputShape, outputShape) {} - - TestStackQuantization(const QuantizerOptions& options, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) {} - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Input : - { - break; - } - case armnn::LayerType::Output : - { - break; - } - case armnn::LayerType::Stack : - { - TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); - - TestQuantizationParams(outputInfo, - { 30.0f / g_AsymmU8QuantizationBase, 128 }, - { 30.0f / g_AsymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS8QuantizationBase, 0}, - { 15.0f / g_SymmS16QuantizationBase, 0 }); - break; - } - default: - {} - } - } - }; - - INetworkPtr network = INetwork::Create(); - - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* input1 = network->AddInputLayer(1); - - const TensorShape inputShape{ 3, 4, 5 }; - const TensorShape outputShape{ 3, 4, 2, 5 }; - - StackDescriptor descriptor(2, 2, inputShape); - IConnectableLayer* stackLayer = network->AddStackLayer(descriptor); - - IConnectableLayer* output = network->AddOutputLayer(0); - - input0->GetOutputSlot(0).Connect(stackLayer->GetInputSlot(0)); - input1->GetOutputSlot(0).Connect(stackLayer->GetInputSlot(1)); - stackLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); - TestStackQuantization validatorQAsymmU8(inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - - const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); - INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); - TestStackQuantization validatorQAsymmS8(qAsymmS8Options, inputShape, inputShape); - VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); - - const QuantizerOptions qSymmS8Options(DataType::QSymmS8); - INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); - TestStackQuantization validatorQSymmS8(qSymmS8Options, inputShape, inputShape); - VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); - - const QuantizerOptions qSymmS16options(DataType::QSymmS16); - INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); - TestStackQuantization validatorQSymmS16(qSymmS16options, inputShape, outputShape); - VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); -} - -BOOST_AUTO_TEST_CASE(QuantizeSlice) -{ - TensorShape shape{ 3 }; - TensorInfo info(shape, DataType::Float32); - - INetworkPtr network = INetwork::Create(); - - IConnectableLayer* inputLayer = network->AddInputLayer(0); - IConnectableLayer* sliceLayer = network->AddSliceLayer(SliceDescriptor()); - IConnectableLayer* outputLayer = network->AddOutputLayer(0); - - inputLayer->GetOutputSlot(0).Connect(sliceLayer->GetInputSlot(0)); - sliceLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(info); - sliceLayer->GetOutputSlot(0).SetTensorInfo(info); - - TestNetwork(network.get(), shape); -} - -std::vector SetupQuantize(float value) -{ - armnn::TensorInfo inputInfo({ 1, 2, 2 }, armnn::DataType::Float32); - inputInfo.SetQuantizationScale(1.0f); - inputInfo.SetQuantizationOffset(1); - std::vector input({ value, 0.0f, 0.0f, 1.0f }); - const std::vector &inputRef = input; - - auto output = armnnUtils::QuantizedVector(inputRef, - inputInfo.GetQuantizationScale(), - inputInfo.GetQuantizationOffset()); - - return output; -} - -BOOST_AUTO_TEST_CASE(QuantizeInf) -{ - BOOST_CHECK_EQUAL(SetupQuantize(std::numeric_limits::infinity())[0], 255); -} - -BOOST_AUTO_TEST_CASE(QuantizeNegativeInf) -{ - BOOST_CHECK_EQUAL(SetupQuantize(-1 * std::numeric_limits::infinity())[0], 0); -} - -class TestPreserveType : public TestQuantization -{ -public: - TestPreserveType(const QuantizerOptions& options, - const DataType& dataType, - const TensorShape& inputShape, - const TensorShape& outputShape) - : TestQuantization(options, inputShape, outputShape) - , m_DataType(dataType) - , m_VisitedQuantizeLayer(false) - , m_VisitedDequantizeLayer(false) {} - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Input : - { - const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); - BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); - BOOST_TEST(m_InputShape == info.GetShape()); - break; - } - case armnn::LayerType::Output : - { - const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); - BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); - BOOST_TEST(m_OutputShape == info.GetShape()); - break; - } - case armnn::LayerType::Quantize : - { - m_VisitedQuantizeLayer = true; - break; - } - case armnn::LayerType::Dequantize : - { - m_VisitedDequantizeLayer = true; - break; - } - default: - {} - } - } - - void CheckQuantizeDequantizeLayerVisited(bool expected) - { - if (expected) - { - BOOST_CHECK(m_VisitedQuantizeLayer); - BOOST_CHECK(m_VisitedDequantizeLayer); - } - else - { - BOOST_CHECK(!m_VisitedQuantizeLayer); - BOOST_CHECK(!m_VisitedDequantizeLayer); - } - } -private: - const DataType m_DataType; - bool m_VisitedQuantizeLayer; - bool m_VisitedDequantizeLayer; -}; - -void PreserveTypeTestImpl(const DataType& dataType) -{ - INetworkPtr network = INetwork::Create(); - - // Add the layers - IConnectableLayer* input0 = network->AddInputLayer(0); - IConnectableLayer* input1 = network->AddInputLayer(1); - IConnectableLayer* addition = network->AddAdditionLayer(); - IConnectableLayer* output = network->AddOutputLayer(2); - - input0->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); - input1->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); - addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); - - const TensorShape shape{1U, 2U, 3U}; - const TensorInfo info(shape, dataType); - input0->GetOutputSlot(0).SetTensorInfo(info); - input1->GetOutputSlot(0).SetTensorInfo(info); - addition->GetOutputSlot(0).SetTensorInfo(info); - - QuantizerOptions options = dataType == DataType::Float32 ? - QuantizerOptions(DataType::QAsymmU8, true) : QuantizerOptions(dataType, true); - - INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); - TestPreserveType validatorQAsymmU8(options, dataType, shape, shape); - VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); - validatorQAsymmU8.CheckQuantizeDequantizeLayerVisited( - dataType == DataType::Float32 || dataType == DataType::Float16); -} - -BOOST_AUTO_TEST_CASE(PreserveTypeFloat32) -{ - PreserveTypeTestImpl(DataType::Float32); -} - -BOOST_AUTO_TEST_CASE(PreserveTypeQAsymmU8) -{ - PreserveTypeTestImpl(DataType::QAsymmU8); -} - -BOOST_AUTO_TEST_CASE(PreserveTypeQsymm8) -{ - PreserveTypeTestImpl(DataType::QSymmS8); -} - -BOOST_AUTO_TEST_CASE(PreserveTypeQsymm16) -{ - PreserveTypeTestImpl(DataType::QSymmS16); -} - -BOOST_AUTO_TEST_CASE(TestConnectionPreservationAfterDynamicQuant) -{ - class TestConnectionPreservation : public IStrategy - { - public: - TestConnectionPreservation(const Graph& graph) - : m_Graph(graph) - {} - - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id) override - { - IgnoreUnused(name, constants, id, descriptor); - - switch (layer->GetType()) - { - case armnn::LayerType::Addition : - { - CheckLayerName(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), "reLU1"); - CheckLayerName(layer->GetInputSlot(1).GetConnection()->GetOwningLayerGuid(), "reLU2"); - break; - } - default: - {} - } - } - - void CheckLayerName(LayerGuid guid, std::string expectedName) - { - bool guidFound = false; - for (Layer* layer : m_Graph) - { - if (layer->GetGuid() == guid) - { - BOOST_CHECK_EQUAL(layer->GetName(), expectedName.c_str()); - guidFound = true; - break; - } - } - if (!guidFound) - { - BOOST_FAIL("No layer matching the GUID was found"); - } - } - private: - Graph m_Graph; - }; - - class TestNetwork : public INetwork - { - public : - NetworkImpl* GetPNetworkImpl() - { - return pNetworkImpl.get(); - } - }; - - TestNetwork testNetwork; - - IConnectableLayer* inputLayer = testNetwork.AddInputLayer(0,"inputLayer1"); - armnn::ActivationDescriptor ReLUDesc; - ReLUDesc.m_Function = ActivationFunction::ReLu; - - IConnectableLayer* reLULayer1 = testNetwork.AddActivationLayer(ReLUDesc, "reLU1"); - IConnectableLayer* reLULayer2 = testNetwork.AddActivationLayer(ReLUDesc, "reLU2"); - IConnectableLayer* addLayer1 = testNetwork.AddAdditionLayer("addLayer1"); - IConnectableLayer* outputLayer = testNetwork.AddOutputLayer(0,"outPutLayer1"); - - inputLayer->GetOutputSlot(0).Connect(reLULayer1->GetInputSlot(0)); - reLULayer1->GetOutputSlot(0).Connect(reLULayer2->GetInputSlot(0)); - reLULayer1->GetOutputSlot(0).Connect(addLayer1->GetInputSlot(0)); - reLULayer2->GetOutputSlot(0).Connect(addLayer1->GetInputSlot(1)); - addLayer1->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); - - inputLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); - reLULayer1->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); - reLULayer2->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); - addLayer1->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); - - TestConnectionPreservation strategy1(testNetwork.GetPNetworkImpl()->GetGraph()); - VisitLayersTopologically(&testNetwork, strategy1); - - armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(&testNetwork); - - armnn::TensorInfo tensorInfo = GetInputTensorInfo(&testNetwork); - - std::vector inputData({0, 2, 0, 4}); - armnn::ConstTensor inputTensor(tensorInfo, inputData.data()); - - InputTensors inputTensors; - inputTensors.push_back(std::make_pair(0, inputTensor)); - quantizer->Refine(inputTensors); - - INetworkPtr quantNetwork = quantizer->ExportNetwork(); - - TestNetwork* testQuantNetwork = static_cast(quantNetwork.get()); - - TestConnectionPreservation strategy2(testQuantNetwork->GetPNetworkImpl()->GetGraph()); - VisitLayersTopologically(quantNetwork.get(), strategy2); -} - -BOOST_AUTO_TEST_SUITE_END() -} // namespace armnn -- cgit v1.2.1