// // Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "../Graph.hpp" #include "../Network.hpp" #include "../NetworkQuantizerUtils.hpp" #include "../OverrideInputRangeVisitor.hpp" #include #include #include #include #include #include #include #include #include namespace armnn { using MinMaxRange = std::pair; using MinMaxRanges = std::vector; using MinMaxRangeMap = std::unordered_map; const float g_AsymmU8QuantizationBase = 255.0f; // Coinciding with calcution which for AsymmS8 which calculates scale on an unsigned basis const float g_AsymmS8QuantizationBase = 255.0f; const float g_SymmS8QuantizationBase = 127.0f; const float g_SymmS16QuantizationBase = 32767.0f; const float g_TestTolerance = 0.000001f; class TestConnectionPreservation : public LayerVisitorBase { public: TestConnectionPreservation(INetwork* network) : LayerVisitorBase() , m_Network(network) {} void VisitAdditionLayer(const IConnectableLayer* layer, const char*) override { CheckLayerName(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), "reLU1"); CheckLayerName(layer->GetInputSlot(1).GetConnection()->GetOwningLayerGuid(), "reLU2"); } void CheckLayerName(LayerGuid guid, std::string expectedName) { auto graph = m_Network->pNetworkImpl->GetGraph(); bool guidFound = false; for (Layer* layer : graph) { if (layer->GetGuid() == guid) { BOOST_CHECK_EQUAL(layer->GetName(), expectedName.c_str()); guidFound = true; break; } } if (!guidFound) { BOOST_FAIL("No layer matching the GUID was found"); } } private: INetwork* m_Network; }; void VisitLayersTopologically(const INetwork* inputNetwork, IStrategy& visitor) { auto graph = inputNetwork->pNetworkImpl->GetGraph().TopologicalSort(); ApplyStrategyToLayers(graph, visitor); } TensorInfo GetInputTensorInfo(const INetwork* network) { for (auto&& inputLayer : network->pNetworkImpl->GetGraph().GetInputLayers()) { ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); return inputLayer->GetOutputSlot(0).GetTensorInfo(); } throw InvalidArgumentException("Network has no input layers"); } TensorInfo GetInputTensorInfo(const NetworkImpl* network) { for (auto&& inputLayer : network->GetGraph().GetInputLayers()) { ARMNN_ASSERT_MSG(inputLayer->GetNumOutputSlots() == 1, "Input layer should have exactly 1 output slot"); return inputLayer->GetOutputSlot(0).GetTensorInfo(); } throw InvalidArgumentException("Network has no input layers"); } BOOST_AUTO_TEST_SUITE(Quantizer) class TestQuantization : public IStrategy { public: TestQuantization(const TensorShape &inputShape, const TensorShape &outputShape) : m_InputShape(inputShape), m_OutputShape(outputShape), m_QuantizerOptions(QuantizerOptions()) {} TestQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& outputShape) : m_InputShape(inputShape) , m_OutputShape(outputShape) , m_QuantizerOptions(options) {} void ExecuteStrategy(const armnn::IConnectableLayer *layer, const BaseDescriptor &descriptor, const std::vector &constants, const char *name, const armnn::LayerBindingId id) override { IgnoreUnused(id, name); if (layer->GetType() == armnn::LayerType::Output) { const TensorInfo &info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); BOOST_TEST(m_OutputShape == info.GetShape()); return; } const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); switch (layer->GetType()) { case armnn::LayerType::BatchToSpaceNd : case armnn::LayerType::Permute : case armnn::LayerType::Pooling2d : case armnn::LayerType::Reshape : case armnn::LayerType::Resize : case armnn::LayerType::SpaceToBatchNd : case armnn::LayerType::Splitter : case armnn::LayerType::StridedSlice : { CheckDefaultQuantizationSettings(info); break; } case armnn::LayerType::Addition : { // Based off default static range [-20.0f, 20.0f] TestQuantizationParams(info, {40.0f / g_AsymmU8QuantizationBase, 128}, {40.0f / g_AsymmS8QuantizationBase, 0}, {20.0f / g_SymmS8QuantizationBase, 0}, {20.0f / g_SymmS16QuantizationBase, 0}); break; } case armnn::LayerType::Activation : { const ActivationDescriptor& activationDescriptor = static_cast(descriptor); switch (activationDescriptor.m_Function) { case ActivationFunction::BoundedReLu : { // Based off default static range [0.0f, 3.5f] TestQuantizationParams(info, {3.5f / g_AsymmU8QuantizationBase, 0}, {3.5f / g_AsymmS8QuantizationBase, -128}, {3.5f / g_SymmS8QuantizationBase, 0}, {3.5f / g_SymmS16QuantizationBase, 0}); break; } case ActivationFunction::Elu : { TestQuantizationParams( info, {30.0f / g_AsymmU8QuantizationBase, 128}, {30.0f / g_AsymmS8QuantizationBase, 0}, {15.0f / g_SymmS8QuantizationBase, 0}, {15.0f / g_SymmS16QuantizationBase, 0}); break; } case ActivationFunction::HardSwish : { TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, {30.0f / g_AsymmS8QuantizationBase, 0}, {15.0f / g_SymmS8QuantizationBase, 0}, {15.0f / g_SymmS16QuantizationBase, 0}); break; } case ActivationFunction::LeakyReLu : { // Based off default static range [-5.0f, 15.0f] TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, {20.0f / g_AsymmS8QuantizationBase,-64}, {15.0f / g_SymmS8QuantizationBase , 0}, {15.0f / g_SymmS16QuantizationBase, 0}); break; } case ActivationFunction::TanH : { TestQuantizationParams(info, {2.0f / g_AsymmU8QuantizationBase, 128}, {2.0f / g_AsymmS8QuantizationBase, 0}, {1.0f / g_SymmS8QuantizationBase , 0}, {1.0f / g_SymmS16QuantizationBase, 0}); break; } default: { // Based off default static range [0.0f, 15.0f] TestQuantizationParams(info, {15.0f / g_AsymmU8QuantizationBase, 0}, {15.0f / g_AsymmS8QuantizationBase, -128}, {15.0f / g_SymmS8QuantizationBase, 0}, {15.0f / g_SymmS16QuantizationBase, 0}); break; } } break; } case armnn::LayerType::ArgMinMax : { const ArgMinMaxDescriptor& argMinMaxDescriptor = static_cast(descriptor); if(argMinMaxDescriptor.m_Function == ArgMinMaxFunction::Max) { break; } TestQuantizationParams(info, { 30.0f / g_AsymmU8QuantizationBase, 128 }, { 30.0f / g_AsymmS8QuantizationBase, 0}, { 15.0f / g_SymmS8QuantizationBase, 0}, { 15.0f / g_SymmS16QuantizationBase, 0 }); break; } case armnn::LayerType::BatchNormalization : { // Based off default static range [-15.0f, 15.0f] TestQuantizationParams( info, {30.0f / g_AsymmU8QuantizationBase, 128}, {30.0f / g_AsymmS8QuantizationBase, 0}, {15.0f / g_SymmS8QuantizationBase, 0}, {15.0f / g_SymmS16QuantizationBase, 0}); // Test constants TestConstantQuantizationParams(constants[0].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); TestConstantQuantizationParams(constants[1].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); TestConstantQuantizationParams(constants[2].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); TestConstantQuantizationParams(constants[3].GetInfo(), {3.0f / g_AsymmU8QuantizationBase, 85}); break; } case armnn::LayerType::Comparison : { const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); break; } case armnn::LayerType::Constant : { // Based off the range of values in the const tensor used for the test: [-2.0f, 6.0f] TestQuantizationParams(info, {8.0f / g_AsymmU8QuantizationBase, 64}, {8.0f / g_AsymmS8QuantizationBase, -64}, {6.0f / g_SymmS8QuantizationBase, 0}, {6.0f / g_SymmS16QuantizationBase, 0}); break; } case armnn::LayerType::Convolution2d : { if (constants.size() == 1) { TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); } else if (constants.size() == 1) { TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); } break; } case armnn::LayerType::DepthwiseConvolution2d : { if (constants.size() == 2) { TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); } else if (constants.size() == 1) { TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); } break; } case armnn::LayerType::DepthToSpace : { const OffsetScalePair qAsymmU8Params{30.0f / g_AsymmU8QuantizationBase, 128}; const OffsetScalePair qAsymmS8Params{30.0f / g_AsymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS8Params{15.0f / g_SymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS16Params{15.0f / g_SymmS16QuantizationBase, 0}; TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); break; } case armnn::LayerType::FullyConnected : { if (constants.size() == 2) { TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); } else if (constants.size() == 1) { TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); } break; } case armnn::LayerType::Fill : { const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); break; } case armnn::LayerType::Input : { BOOST_TEST(m_InputShape == info.GetShape()); // Based off current default [-15.0f, 15.0f] TestQuantizationParams(info, {30.0f / g_AsymmU8QuantizationBase, 128}, {30.0f / g_AsymmS8QuantizationBase, 0}, {15.0f / g_SymmS8QuantizationBase, 0}, {15.0f / g_SymmS16QuantizationBase, 0}); break; } case armnn::LayerType::InstanceNormalization : { const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); break; } case armnn::LayerType::LogSoftmax : { const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; const OffsetScalePair qAsymmS8Params { 30.0f / g_AsymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0}; const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); break; } case armnn::LayerType::Slice : { const OffsetScalePair qAsymmU8Params{ 30.0f / g_AsymmU8QuantizationBase, 128 }; const OffsetScalePair qAsymmS8Params{ 30.0f / g_AsymmS8QuantizationBase, 0 }; const OffsetScalePair qSymmS8Params { 15.0f / g_SymmS8QuantizationBase, 0 }; const OffsetScalePair qSymmS16Params{ 15.0f / g_SymmS16QuantizationBase, 0 }; TestQuantizationParams(info, qAsymmU8Params, qAsymmS8Params, qSymmS8Params, qSymmS16Params); break; } case armnn::LayerType::Softmax : { // Based off default static range [0.0f, 1.0f] TestQuantizationParams(info, {1.0f / g_AsymmU8QuantizationBase, 0}, {1.0f / g_AsymmS8QuantizationBase, -128}, {1.0f / g_SymmS8QuantizationBase, 0}, {1.0f / g_SymmS16QuantizationBase, 0}); break; } case armnn::LayerType::SpaceToDepth : { TestQuantizationParams(info, { 30.0f / g_AsymmU8QuantizationBase, 128 }, { 30.0f / g_AsymmS8QuantizationBase, 0 }, { 15.0f / g_SymmS8QuantizationBase, 0 }, { 15.0f / g_SymmS16QuantizationBase, 0 }); break; } case armnn::LayerType::Stack : { TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams(outputInfo, { 30.0f / g_AsymmU8QuantizationBase, 128 }, { 30.0f / g_AsymmS8QuantizationBase, 0}, { 15.0f / g_SymmS8QuantizationBase, 0}, { 15.0f / g_SymmS16QuantizationBase, 0 }); break; } case armnn::LayerType::TransposeConvolution2d : { if (constants.size() == 2) { TestQuantizationOnLayersWithBiases(layer, constants[0], constants[1]); } else if (constants.size() == 1) { TestQuantizationOnLayersWithBiases(layer, constants[0], armnn::EmptyOptional()); } break; } default: { throw UnimplementedException("Unimplemented layer encountered"); } } } protected: void CheckDefaultQuantizationSettings(const TensorInfo& info) { TestQuantizationParams(info, {20.0f / g_AsymmU8QuantizationBase, 64}, {20.0f / g_AsymmS8QuantizationBase,-64}, {15.0f / g_SymmS8QuantizationBase, 0}, {15.0f / g_SymmS16QuantizationBase, 0}); } void TestQuantizationParams(const TensorInfo& info, const OffsetScalePair& qAsymmU8Params, const OffsetScalePair& qAsymmS8Params, const OffsetScalePair& qSymmS8Params, const OffsetScalePair& qSymmS16Params) { switch (m_QuantizerOptions.m_ActivationFormat) { case DataType::QAsymmU8: TestQuantizationParamsImpl( info, DataType::QAsymmU8, qAsymmU8Params.first, qAsymmU8Params.second); break; case DataType::QAsymmS8: TestQuantizationParamsImpl( info, DataType::QAsymmS8, qAsymmS8Params.first, qAsymmS8Params.second); break; case DataType::QSymmS8: TestQuantizationParamsImpl( info, DataType::QSymmS8, qSymmS8Params.first, qSymmS8Params.second); break; case DataType::QSymmS16: TestQuantizationParamsImpl( info, DataType::QSymmS16, qSymmS16Params.first, qSymmS16Params.second); break; default: throw InvalidArgumentException("Unsupported quantization target"); } } void TestDifferentQuantizationScale(const TensorInfo& info0, const TensorInfo& info1) { BOOST_TEST(info0.GetQuantizationScale() != info1.GetQuantizationScale()); } void TestConstantQuantizationParams(const TensorInfo& info, const OffsetScalePair& params, DataType dataType = DataType::QAsymmU8) { IgnoreUnused(dataType); TestQuantizationParamsImpl(info, dataType, params.first, params.second); } void TestBiasQuantizationParams(const TensorInfo& info, const OffsetScalePair& qAsymmU8Params, const OffsetScalePair& qAsymmS8Params, const OffsetScalePair& qSymmS8Params, const OffsetScalePair& qSymmS16Params, DataType dataType = DataType::QAsymmU8) { switch (m_QuantizerOptions.m_ActivationFormat) { case DataType::QAsymmU8: TestQuantizationParamsImpl(info, dataType, qAsymmU8Params.first, qAsymmU8Params.second); break; case DataType::QAsymmS8: TestQuantizationParamsImpl(info, dataType, qAsymmS8Params.first, qAsymmS8Params.second); break; case DataType::QSymmS8: TestQuantizationParamsImpl(info, dataType, qSymmS8Params.first, qSymmS8Params.second); break; case DataType::QSymmS16: TestQuantizationParamsImpl(info, dataType, qSymmS16Params.first, qSymmS16Params.second); break; default: throw InvalidArgumentException("Unsupported quantization target"); } } void TestQuantizationOnLayersWithBiases(const IConnectableLayer* layer, const ConstTensor& weights, const Optional& biases) { TensorInfo info = layer->GetOutputSlot(0).GetTensorInfo(); float inputScaleQAsymmU8 = 30.0f / g_AsymmU8QuantizationBase; float inputScaleQAsymmS8 = 30.0f / g_AsymmS8QuantizationBase; float inputScaleQSymmS8 = 15.0f / g_SymmS8QuantizationBase; float inputScaleQSymmS16 = 15.0f / g_SymmS16QuantizationBase; float weightsScale = 3.0f / g_AsymmU8QuantizationBase; // Based off default static range [-15.0f, 15.0f] TestQuantizationParams(info, {inputScaleQAsymmU8, 128}, {inputScaleQAsymmS8, 0}, {inputScaleQSymmS8, 0}, {inputScaleQSymmS16, 0}); TestConstantQuantizationParams(weights.GetInfo(), {weightsScale, 85}); if (biases.has_value()) { TestBiasQuantizationParams(biases.value().GetInfo(), {inputScaleQAsymmU8 * weightsScale, 0}, {inputScaleQAsymmS8 * weightsScale, 0}, {inputScaleQSymmS8 * weightsScale, 0}, {inputScaleQSymmS16 * weightsScale, 0}, DataType::Signed32); } } TensorShape m_InputShape; TensorShape m_OutputShape; private: void TestQuantizationParamsImpl(const TensorInfo& info, DataType dataType, float scale, int32_t offset) { BOOST_TEST((info.GetDataType() == dataType)); BOOST_TEST(info.GetQuantizationOffset() == offset); BOOST_CHECK_CLOSE(info.GetQuantizationScale(), scale, g_TestTolerance); } QuantizerOptions m_QuantizerOptions; }; void TestNetwork(INetwork* network, const TensorShape inShape, const TensorShape outShape) { const QuantizerOptions qAsymmU8Options(DataType::QAsymmU8); INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network, qAsymmU8Options)->ExportNetwork(); TestQuantization validatorQAsymmU8(inShape, outShape); VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network, qAsymmS8Options)->ExportNetwork(); TestQuantization validatorQAsymmS8(qAsymmS8Options, inShape, outShape); VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); const QuantizerOptions qSymmS8Options(DataType::QSymmS8); INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network, qSymmS8Options)->ExportNetwork(); TestQuantization validatorQSymmS8(qSymmS8Options, inShape, outShape); VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); const QuantizerOptions qSymmS16options(DataType::QSymmS16); INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network, qSymmS16options)->ExportNetwork(); TestQuantization validatorQSymmS16(qSymmS16options, inShape, outShape); VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); } void TestNetwork(INetwork* network, const TensorShape shape) { TestNetwork(network, shape, shape); } BOOST_AUTO_TEST_CASE(QuantizeAddition) { INetworkPtr network = INetwork::Create(); // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* input1 = network->AddInputLayer(1); IConnectableLayer* addition = network->AddAdditionLayer(); IConnectableLayer* output = network->AddOutputLayer(2); // Establish connections input0->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); input1->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); addition->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } INetworkPtr CreateNetworkWithActivationLayer(const ActivationDescriptor& descriptor, const TensorShape& shape) { INetworkPtr network = INetwork::Create(); // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* activation = network->AddActivationLayer(descriptor); IConnectableLayer* output = network->AddOutputLayer(2); // Establish connections input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo TensorInfo info(shape, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); activation->GetOutputSlot(0).SetTensorInfo(info); return network; } INetworkPtr CreateNetworkWithArgMinMaxLayer(const ArgMinMaxDescriptor& descriptor, const TensorShape& shape) { INetworkPtr network = INetwork::Create(); // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* activation = network->AddArgMinMaxLayer(descriptor); IConnectableLayer* output = network->AddOutputLayer(2); // Establish connections input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo TensorInfo inInfo(shape, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(inInfo); TensorInfo outInfo({1}, DataType::Signed32); activation->GetOutputSlot(0).SetTensorInfo(outInfo); return network; } INetworkPtr CreateNetworkWithInputOutputLayers() { INetworkPtr network = INetwork::Create(); // Add input/output layers IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections inputLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo TensorShape shape{8U}; TensorInfo info(shape, DataType::Float32); inputLayer->GetOutputSlot(0).SetTensorInfo(info); return network; } BOOST_AUTO_TEST_CASE(InputOutputLayerDynamicQuant) { INetworkPtr network = CreateNetworkWithInputOutputLayers(); armnn::TensorInfo tensorInfo = GetInputTensorInfo(network.get()); // Outliers -56 and 98 std::vector inputData({0, 0, 0, -56, 98, 0, 0, 0}); armnn::ConstTensor inputTensor(tensorInfo, inputData.data()); InputTensors inputTensors; inputTensors.push_back(std::make_pair(0, inputTensor)); armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(network.get()); quantizer->Refine(inputTensors); // Outliers -77 and 65 std::vector inputData2({0, -77, 0, -56, 65, 0, 0, 0}); armnn::ConstTensor inputTensor2(tensorInfo, inputData2.data()); InputTensors inputTensors2; inputTensors2.push_back(std::make_pair(0, inputTensor2)); quantizer->Refine(inputTensors2); INetworkPtr quantizedNetwork = quantizer->ExportNetwork(); // Output Layer should be quantized for a min max of -77 and 98 // according to QU8 Quantization Scheme std::unique_ptr quantizationScheme = std::make_unique(); OffsetScalePair qParams = quantizationScheme->ComputeScheme(-77.0, 98.0); class TestOutputStrategy : public IStrategy { public : TestOutputStrategy(const OffsetScalePair& offsetScalePair, const DataType& dataType) : m_OffsetScalePair(offsetScalePair), m_DataType(dataType) {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) override { IgnoreUnused(name, constants, id, descriptor); switch (layer->GetType()) { case armnn::LayerType::Output : { const TensorInfo &info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); BOOST_CHECK_MESSAGE(info.GetDataType() == m_DataType, std::string(armnn::GetDataTypeName(info.GetDataType())) .append(" == ").append(armnn::GetDataTypeName(m_DataType))); // int_32t BOOST_CHECK(info.GetQuantizationOffset() == m_OffsetScalePair.second); // float BOOST_TEST(info.GetQuantizationScale() == m_OffsetScalePair.first, boost::test_tools::tolerance(0.001)); break; } default: {} } } private: const OffsetScalePair m_OffsetScalePair; const DataType m_DataType; }; TestOutputStrategy strategy(qParams, quantizationScheme->GetDataType()); quantizedNetwork->ExecuteStrategy(strategy); } BOOST_AUTO_TEST_CASE(QuantizeAbsActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::Abs; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeArgMax) { ArgMinMaxDescriptor descriptor; descriptor.m_Function = ArgMinMaxFunction::Max; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithArgMinMaxLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeLinearActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::Linear; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeReLuActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::ReLu; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSoftReLuActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::SoftReLu; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeBoundedReluActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::BoundedReLu; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeTanHActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::TanH; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeLeakyReLuActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::LeakyReLu; descriptor.m_A = 3.5f; descriptor.m_B = -10.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeELuActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::Elu; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeHardSwishActivation) { ActivationDescriptor descriptor; descriptor.m_Function = ActivationFunction::HardSwish; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithActivationLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeBatchNorm) { INetworkPtr network = INetwork::Create(); const TensorShape shape{3U}; TensorInfo info(shape, DataType::Float32); std::vector meanData{-1.0f, 1.5f, 2.0f}; std::vector varData{-1.0f, 1.5f, 2.0f}; std::vector betaData{-1.0f, 1.5f, 2.0f}; std::vector gammaData{-1.0f, 1.5f, 2.0f}; ConstTensor mean(info, meanData); ConstTensor var(info, varData); ConstTensor beta(info, betaData); ConstTensor gamma(info, gammaData); BatchNormalizationDescriptor desc; // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* batchNorm = network->AddBatchNormalizationLayer(desc, mean, var, beta, gamma); IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections input0->GetOutputSlot(0).Connect(batchNorm->GetInputSlot(0)); batchNorm->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo input0->GetOutputSlot(0).SetTensorInfo(info); batchNorm->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeDepthToSpace) { const TensorShape inputShape { 1, 2, 2, 4 }; const TensorShape outputShape{ 1, 4, 4, 1 }; const TensorInfo inputInfo (inputShape, DataType::Float32); const TensorInfo outputInfo(outputShape, DataType::Float32); INetworkPtr network = INetwork::Create(); const DepthToSpaceDescriptor descriptor(2, armnn::DataLayout::NHWC); IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* depthToSpaceLayer = network->AddDepthToSpaceLayer(descriptor); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(depthToSpaceLayer->GetInputSlot(0)); depthToSpaceLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo); depthToSpaceLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); TestNetwork(network.get(), inputShape, outputShape); } BOOST_AUTO_TEST_CASE(OverrideInputRangeEmptyNetwork) { RangeTracker ranges; RangeTracker::MinMaxRange minMaxRange(-12.3f, 45.6f); // Range to use for the override NetworkImpl network; // Empty network auto inputLayers = network.GetGraph().GetInputLayers(); // Empty list of input layers OverrideInputRangeStrategy overrideInputRangeStrategy(ranges, 0, minMaxRange); ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy); BOOST_CHECK(ranges.IsEmpty()); // Check that the map of ranges remained untouched } BOOST_AUTO_TEST_CASE(OverrideInputRangeNoInputLayers) { RangeTracker ranges; MinMaxRange minMaxRange(-12.3f, 45.6f); // Range to use for the override NetworkImpl network; network.AddAdditionLayer(); // Network with no input layers auto inputLayers = network.GetGraph().GetInputLayers(); // Empty list of input layers OverrideInputRangeStrategy overrideInputRangeStrategy(ranges, 0, minMaxRange); ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy); BOOST_CHECK(ranges.IsEmpty()); // Check that the map of ranges remained untouched } BOOST_AUTO_TEST_CASE(OverrideInputRangeInputLayers) { RangeTracker ranges; MinMaxRange minMaxRange(-12.3f, 45.6f); // Range to use for the override NetworkImpl network; // Adding the layers IConnectableLayer* input0 = network.AddInputLayer(0); IConnectableLayer* input1 = network.AddInputLayer(1); IConnectableLayer* addition = network.AddAdditionLayer(); IConnectableLayer* output = network.AddOutputLayer(2); // Connecting the layer input0->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); input1->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Setting the TensorInfos TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); addition->GetOutputSlot(0).SetTensorInfo(info); auto inputLayers = network.GetGraph().GetInputLayers(); // List of input layers // Trying to override the input range for the input layer with binding id 3 (does not exist in the network) OverrideInputRangeStrategy overrideInputRangeStrategy3(ranges, 3, minMaxRange); ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy3); // Check that the map of ranges remained untouched BOOST_CHECK(ranges.IsEmpty()); // Override the input range for the input layer with binding id 1 OverrideInputRangeStrategy overrideInputRangeStrategy1(ranges, 1, minMaxRange); ApplyStrategyToLayers(inputLayers, overrideInputRangeStrategy1); // Check that the map of ranges has been populated BOOST_CHECK(!ranges.IsEmpty()); // Check that an entry for the input layer with binding id 0 does not exist BOOST_CHECK(!ranges.HasRanges(input0->GetGuid())); // Check that an entry for the input layer with binding id 1 exists BOOST_CHECK(ranges.HasRanges(input1->GetGuid())); // Check the the overridden values are what we intended to set BOOST_CHECK(ranges.GetRange(input1->GetGuid(), 0) == minMaxRange); } INetworkPtr CreateNetworkWithFullyConnectedLayer(const bool biasEnabled, const TensorShape& inputShape, const TensorShape& outputShape) { FullyConnectedDescriptor desc; desc.m_BiasEnabled = biasEnabled; INetworkPtr network = INetwork::Create(); const TensorInfo info(inputShape, DataType::Float32); const TensorInfo outputInfo(outputShape, DataType::Float32); std::vector weightsData{-1.0f, 1.5f, 2.0f}; ConstTensor weights(info, weightsData); // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* fullyConnected; Optional optionalBias; std::vector biasData{10.0f, 20.0f, 30.0f}; if (desc.m_BiasEnabled) { ConstTensor bias(info, biasData); optionalBias = Optional(bias); } fullyConnected = network->AddFullyConnectedLayer(desc, weights, optionalBias); IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections input0->GetOutputSlot(0).Connect(fullyConnected->GetInputSlot(0)); fullyConnected->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo input0->GetOutputSlot(0).SetTensorInfo(info); fullyConnected->GetOutputSlot(0).SetTensorInfo(outputInfo); return network; } void ValidateFullyConnectedLayer(const bool biasEnabled) { const TensorShape shape{3U}; INetworkPtr network = CreateNetworkWithFullyConnectedLayer(biasEnabled, shape, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeFill) { const TensorShape tensorShape{ 1U }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); INetworkPtr network = INetwork::Create(); FillDescriptor descriptor; descriptor.m_Value = 1; IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* fillLayer = network->AddFillLayer(descriptor); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(fillLayer->GetInputSlot(0)); fillLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); fillLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); TestNetwork(network.get(), tensorShape); } BOOST_AUTO_TEST_CASE(QuantizeFullyConnected) { ValidateFullyConnectedLayer(false); } BOOST_AUTO_TEST_CASE(QuantizeFullyConnectedBiasEnabled) { ValidateFullyConnectedLayer(true); } void TestQuantizeConvolution2d(bool useBiases) { INetworkPtr network = INetwork::Create(); TensorShape shape{3U}; TensorInfo info(shape, DataType::Float32); std::vector weightsData{-1.0f, 1.5f, 2.0f}; ConstTensor weights(info, weightsData); Convolution2dDescriptor descriptor; descriptor.m_BiasEnabled = useBiases; // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* conv2d; Optional optionalBiases; std::vector biasesData{-1.0f, 1.5f, 2.0f}; if (useBiases) { ConstTensor biases(info, biasesData); optionalBiases = Optional(biases); } conv2d = network->AddConvolution2dLayer(descriptor, weights, optionalBiases); IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections input0->GetOutputSlot(0).Connect(conv2d->GetInputSlot(0)); conv2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo input0->GetOutputSlot(0).SetTensorInfo(info); conv2d->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeConvolution2d) { TestQuantizeConvolution2d(false); } BOOST_AUTO_TEST_CASE(QuantizeConvolution2dWithBiases) { TestQuantizeConvolution2d(true); } void TestQuantizeDepthwiseConvolution2d(bool useBiases) { INetworkPtr network = INetwork::Create(); TensorShape shape{3U}; TensorInfo info(shape, DataType::Float32); std::vector weightsData{-1.0f, 1.5f, 2.0f}; ConstTensor weights(info, weightsData); DepthwiseConvolution2dDescriptor descriptor; descriptor.m_BiasEnabled = useBiases; // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* depthwiseConv2d; Optional optionalBiases; std::vector biasesData{-1.0f, 1.5f, 2.0f}; if (useBiases) { ConstTensor biases(info, biasesData); optionalBiases = Optional(biases); } depthwiseConv2d = network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBiases); IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections input0->GetOutputSlot(0).Connect(depthwiseConv2d->GetInputSlot(0)); depthwiseConv2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); //Set TensorInfo input0->GetOutputSlot(0).SetTensorInfo(info); depthwiseConv2d->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeDepthwiseConvolution2d) { TestQuantizeDepthwiseConvolution2d(false); } BOOST_AUTO_TEST_CASE(QuantizeDepthwiseConvolution2dWithBiases) { TestQuantizeDepthwiseConvolution2d(true); } BOOST_AUTO_TEST_CASE(QuantizeInstanceNormalization) { const TensorShape shape{ 1, 4, 4, 1 }; const TensorInfo tensorInfo(shape, DataType::Float32); INetworkPtr network = INetwork::Create(); IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* instanceNormLayer = network->AddInstanceNormalizationLayer(InstanceNormalizationDescriptor()); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(instanceNormLayer->GetInputSlot(0)); instanceNormLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); instanceNormLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeLogSoftmax) { const TensorShape tensorShape{ 1U }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); INetworkPtr network = INetwork::Create(); LogSoftmaxDescriptor descriptor; descriptor.m_Beta = 1.0f; IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* logSoftmaxLayer = network->AddLogSoftmaxLayer(descriptor); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(logSoftmaxLayer->GetInputSlot(0)); logSoftmaxLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); logSoftmaxLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); TestNetwork(network.get(), tensorShape); } INetworkPtr CreateNetworkWithSoftmaxLayer(const SoftmaxDescriptor& descriptor, const TensorShape& shape) { INetworkPtr network = INetwork::Create(); // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* softmax = network->AddSoftmaxLayer(descriptor); IConnectableLayer* output = network->AddOutputLayer(2); // Establish connections input0->GetOutputSlot(0).Connect(softmax->GetInputSlot(0)); softmax->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo TensorInfo info(shape, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); softmax->GetOutputSlot(0).SetTensorInfo(info); return network; } BOOST_AUTO_TEST_CASE(QuantizeSoftmax) { SoftmaxDescriptor descriptor; descriptor.m_Beta = 1.0f; const TensorShape shape{1U}; INetworkPtr network = CreateNetworkWithSoftmaxLayer(descriptor, shape); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeStandIn) { const TensorShape tensorShape{ 1U }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); INetworkPtr network = INetwork::Create(); StandInDescriptor descriptor; descriptor.m_NumInputs = 1; descriptor.m_NumOutputs = 1; IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* standInLayer = network->AddStandInLayer(descriptor); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(standInLayer->GetInputSlot(0)); standInLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); standInLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); // test QAsymmU8 quantization BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get())->ExportNetwork(), armnn::UnimplementedException); // test QAsymmS8 quantization const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(), armnn::UnimplementedException); // test QuantisedSymmS16 quantization const QuantizerOptions qSymmS8Options(DataType::QSymmS8); BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(), armnn::UnimplementedException); // test QuantisedSymmS16 quantization const QuantizerOptions qSymmS16options(DataType::QSymmS16); BOOST_CHECK_THROW(INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(), armnn::UnimplementedException); } IConnectableLayer* CreateStartOfLeakyReluNetwork(INetwork* network, const TensorInfo& info) { ActivationDescriptor activationDescriptor; activationDescriptor.m_Function = ActivationFunction::LeakyReLu; activationDescriptor.m_A = 3.5f; activationDescriptor.m_B = -10.0f; // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* activation = network->AddActivationLayer(activationDescriptor); // Establish connections input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); // Set TensorInfo input0->GetOutputSlot(0).SetTensorInfo(info); activation->GetOutputSlot(0).SetTensorInfo(info); return activation; } void CompleteLeakyReluNetwork(INetwork* network, IConnectableLayer* activation, IConnectableLayer* layerUnderTest, const TensorInfo& info) { // Add the output Layer IConnectableLayer* output = network->AddOutputLayer(3); // Establish connections activation->GetOutputSlot(0).Connect(layerUnderTest->GetInputSlot(0)); layerUnderTest->GetOutputSlot(0).Connect(output->GetInputSlot(0)); //Set TensorInfo layerUnderTest->GetOutputSlot(0).SetTensorInfo(info); } BOOST_AUTO_TEST_CASE(QuantizePermute) { INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test PermuteDescriptor desc; IConnectableLayer* permute = network->AddPermuteLayer(desc); CompleteLeakyReluNetwork(network.get(), activation, permute, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSpaceToBatch) { INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test SpaceToBatchNdDescriptor descriptor; IConnectableLayer* spaceToBatch = network->AddSpaceToBatchNdLayer(descriptor); CompleteLeakyReluNetwork(network.get(), activation, spaceToBatch, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSpaceToDepth) { INetworkPtr network = INetwork::Create(); const TensorShape shape{ 1u }; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); IConnectableLayer* spaceToDepth = network->AddSpaceToDepthLayer(SpaceToDepthDescriptor()); CompleteLeakyReluNetwork(network.get(), activation, spaceToDepth, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizePooling2d) { auto network = INetwork::Create(); TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); Pooling2dDescriptor desc; ActivationDescriptor activationDescriptor; activationDescriptor.m_Function = ActivationFunction::LeakyReLu; activationDescriptor.m_A = 3.5f; activationDescriptor.m_B = -10.0f; // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* activation = network->AddActivationLayer(activationDescriptor); IConnectableLayer* pooling2d = network->AddPooling2dLayer(desc); IConnectableLayer* output = network->AddOutputLayer(3); // Establish connections input0->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); activation->GetOutputSlot(0).Connect(pooling2d->GetInputSlot(0)); pooling2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo input0->GetOutputSlot(0).SetTensorInfo(info); activation->GetOutputSlot(0).SetTensorInfo(info); pooling2d->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeConstant) { INetworkPtr network = INetwork::Create(); // Constant layer data std::vector data = {-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f}; const TensorShape shape{1U, 1U, 3U, 3U}; TensorInfo tensorInfo(shape, DataType::Float32); ConstTensor constantTensor(tensorInfo, data); // Add the layers IConnectableLayer* input = network->AddInputLayer(0); IConnectableLayer* constant = network->AddConstantLayer(constantTensor); IConnectableLayer* addition = network->AddAdditionLayer(); IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections input->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); constant->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set TensorInfo in the remaining layers input->GetOutputSlot(0).SetTensorInfo(tensorInfo); addition->GetOutputSlot(0).SetTensorInfo(tensorInfo); constant->GetOutputSlot(0).SetTensorInfo(tensorInfo); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeArgMinMax) { INetworkPtr network = INetwork::Create(); const TensorShape inputShape{ 1, 1, 1, 5 }; const TensorShape outputShape{ 1, 1, 1 }; TensorInfo inputInfo(inputShape, DataType::Float32); TensorInfo outputInfo(outputShape, DataType::Float32); // Add the input layers IConnectableLayer* input = network->AddInputLayer(0); // Add the layer under test ArgMinMaxDescriptor argMinMaxDescriptor; argMinMaxDescriptor.m_Function = ArgMinMaxFunction::Max; IConnectableLayer* argMinMaxLayer = network->AddArgMinMaxLayer(argMinMaxDescriptor); // Add the output layers IConnectableLayer* output = network->AddOutputLayer(1); // Establish connections input->GetOutputSlot(0).Connect(argMinMaxLayer->GetInputSlot(0)); argMinMaxLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set tensor info input->GetOutputSlot(0).SetTensorInfo(inputInfo); argMinMaxLayer->GetOutputSlot(0).SetTensorInfo(outputInfo); TestNetwork(network.get(), inputShape, outputShape); } BOOST_AUTO_TEST_CASE(QuantizeComparison) { const TensorShape tensorShape{ 1u }; const TensorInfo tensorInfo(tensorShape, DataType::Float32); INetworkPtr network = INetwork::Create(); ComparisonDescriptor descriptor(ComparisonOperation::LessOrEqual); IConnectableLayer* inputLayer0 = network->AddInputLayer(0); IConnectableLayer* inputLayer1 = network->AddInputLayer(1); IConnectableLayer* comparisonLayer = network->AddComparisonLayer(descriptor); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer0->GetOutputSlot(0).Connect(comparisonLayer->GetInputSlot(0)); inputLayer1->GetOutputSlot(0).Connect(comparisonLayer->GetInputSlot(1)); comparisonLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer0->GetOutputSlot(0).SetTensorInfo(tensorInfo); inputLayer1->GetOutputSlot(0).SetTensorInfo(tensorInfo); comparisonLayer->GetOutputSlot(0).SetTensorInfo(tensorInfo); TestNetwork(network.get(), tensorShape); } BOOST_AUTO_TEST_CASE(QuantizeConcat) { class TestConcatQuantization : public TestQuantization { public: TestConcatQuantization(const TensorShape& inputShape, const TensorShape& outputShape) : TestQuantization(inputShape, outputShape) {} TestConcatQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& outputShape) : TestQuantization(options, inputShape, outputShape) {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) override { IgnoreUnused(name, constants, id, descriptor); switch (layer->GetType()) { case armnn::LayerType::Input : break; case armnn::LayerType::Output : break; case armnn::LayerType::Concat : { TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams( outputInfo, {60.8f / g_AsymmU8QuantizationBase, 65}, {60.8f / g_SymmS8QuantizationBase, -63}, {45.3f / g_SymmS8QuantizationBase, 0}, {45.3f / g_SymmS16QuantizationBase, 0}); TensorInfo inputInfo0 = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); TensorInfo inputInfo1 = layer->GetInputSlot(1).GetConnection()->GetTensorInfo(); TensorInfo inputInfo2 = layer->GetInputSlot(2).GetConnection()->GetTensorInfo(); TestDifferentQuantizationScale(inputInfo0, inputInfo1); TestDifferentQuantizationScale(inputInfo0, inputInfo2); TestDifferentQuantizationScale(inputInfo1, inputInfo2); TestDifferentQuantizationScale(inputInfo0, outputInfo); break; } default: {} } } }; INetworkPtr network = INetwork::Create(); IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* input1 = network->AddInputLayer(1); IConnectableLayer* input2 = network->AddInputLayer(2); OriginsDescriptor descriptor(3, 1); IConnectableLayer* concatLayer = network->AddConcatLayer(descriptor); IConnectableLayer* output0 = network->AddOutputLayer(3); // Establish connections input0->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(0)); input1->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(1)); input2->GetOutputSlot(0).Connect(concatLayer->GetInputSlot(2)); concatLayer->GetOutputSlot(0).Connect(output0->GetInputSlot(0)); // Set TensorInfo const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); input2->GetOutputSlot(0).SetTensorInfo(info); concatLayer->GetOutputSlot(0).SetTensorInfo(info); const QuantizerOptions qSymmS8Options(DataType::QSymmS8); const QuantizerOptions qSymmS16options(DataType::QSymmS16); INetworkQuantizerPtr quantizerPtrQAsymmU8 = INetworkQuantizer::Create(network.get()); INetworkQuantizerPtr quantizerPtrQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options); INetworkQuantizerPtr quantizerPtrQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options); // Override the input ranges float min = -15.5f; float max = 45.3f; quantizerPtrQAsymmU8->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); quantizerPtrQAsymmU8->OverrideInputRange(1, (min + 6.7f), max); quantizerPtrQAsymmU8->OverrideInputRange(2, min, (max - 7.8f)); quantizerPtrQSymmS8->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); quantizerPtrQSymmS8->OverrideInputRange(1, (min + 6.7f), max); quantizerPtrQSymmS8->OverrideInputRange(2, min, (max - 7.8f)); quantizerPtrQSymmS16->OverrideInputRange(0, (min + 2.1f), (max - 3.2f)); quantizerPtrQSymmS16->OverrideInputRange(1, (min + 6.7f), max); quantizerPtrQSymmS16->OverrideInputRange(2, min, (max - 7.8f)); INetworkPtr quantizedNetworkQAsymmU8 = quantizerPtrQAsymmU8->ExportNetwork(); TestConcatQuantization validatorQAsymmU8(shape, shape); VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); INetworkPtr quantizedNetworkQSymmS8 = quantizerPtrQSymmS8->ExportNetwork(); TestConcatQuantization validatorQSymmS8(qSymmS8Options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); INetworkPtr quantizedNetworkQSymmS16 = quantizerPtrQSymmS16->ExportNetwork(); TestConcatQuantization validatorQSymmS16(qSymmS16options, shape, shape); VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); } BOOST_AUTO_TEST_CASE(QuantizeReshape) { INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test ReshapeDescriptor descriptor({1, 2, 3, 4}); IConnectableLayer* reshape = network->AddReshapeLayer(descriptor); CompleteLeakyReluNetwork(network.get(), activation, reshape, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeSplitter) { INetworkPtr network = INetwork::Create(); const TensorShape shape{3U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test ViewsDescriptor splitterDesc(2,4); IConnectableLayer* splitter = network->AddSplitterLayer(splitterDesc); CompleteLeakyReluNetwork(network.get(), activation, splitter, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeResize) { INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test ResizeDescriptor descriptor; descriptor.m_TargetHeight = 3; descriptor.m_TargetWidth = 3; IConnectableLayer* resizeLayer = network->AddResizeLayer(descriptor); CompleteLeakyReluNetwork(network.get(), activation, resizeLayer, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeStridedSlice) { INetworkPtr network = INetwork::Create(); const TensorShape shape{3U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test StridedSliceDescriptor stridedSliceDesc; IConnectableLayer* stridedSlice = network->AddStridedSliceLayer(stridedSliceDesc); CompleteLeakyReluNetwork(network.get(), activation, stridedSlice, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeBatchToSpace) { INetworkPtr network = INetwork::Create(); const TensorShape shape{1U}; TensorInfo info(shape, DataType::Float32); IConnectableLayer* activation = CreateStartOfLeakyReluNetwork(network.get(), info); // Add the layer under test BatchToSpaceNdDescriptor descriptor; IConnectableLayer* batchToSpace = network->AddBatchToSpaceNdLayer(descriptor); CompleteLeakyReluNetwork(network.get(), activation, batchToSpace, info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizePrelu) { class TestPreluQuantization : public TestQuantization { public: TestPreluQuantization(const TensorShape& inputShape, const TensorShape& alphaShape, const TensorShape& outputShape) : TestQuantization(inputShape, outputShape) , m_AlphaShape(alphaShape) {} TestPreluQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& alphaShape, const TensorShape& outputShape) : TestQuantization(options, inputShape, outputShape) , m_AlphaShape(alphaShape) {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) override { IgnoreUnused(name, constants, id, descriptor); switch (layer->GetType()) { case armnn::LayerType::Input : { const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); switch (id) { case 0: // Input BOOST_TEST(m_InputShape == info.GetShape()); break; case 1: // Alpha BOOST_TEST(m_AlphaShape == info.GetShape()); break; default: throw InvalidArgumentException("Invalid layer binding id for PReLU layer"); } // Based off current default [-15.0f, 15.0f] TestQuantizationParams(info, { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 { 30.0f / g_AsymmS8QuantizationBase, 0}, // QASymmS8 { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 break; } case armnn::LayerType::Output : { const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); BOOST_TEST(m_OutputShape == info.GetShape()); break; } case armnn::LayerType::Prelu : { const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams(info, { 30.0f / g_AsymmU8QuantizationBase, 128 }, // QASymmU8 { 30.0f / g_AsymmS8QuantizationBase, 0}, // QAsymmS8 { 15.0f / g_SymmS8QuantizationBase, 0}, // QSymmS8 { 15.0f / g_SymmS16QuantizationBase, 0 }); // QSymmS16 break; } default: {} } } private: TensorShape m_AlphaShape; }; INetworkPtr network = INetwork::Create(); const TensorShape inputShape{ 4, 1, 2 }; const TensorShape alphaShape{ 5, 4, 3, 1 }; const TensorShape outputShape{ 5, 4, 3, 2 }; TensorInfo inputInfo(inputShape, DataType::Float32); TensorInfo alphaInfo(alphaShape, DataType::Float32); TensorInfo outputInfo(outputShape, DataType::Float32); // Add the input layers IConnectableLayer* input = network->AddInputLayer(0); IConnectableLayer* alpha = network->AddInputLayer(1); // Add the layer under test IConnectableLayer* prelu = network->AddPreluLayer("prelu"); // Add the output layers IConnectableLayer* output = network->AddOutputLayer(0); // Establish connections input->GetOutputSlot(0).Connect(prelu->GetInputSlot(0)); alpha->GetOutputSlot(0).Connect(prelu->GetInputSlot(1)); prelu->GetOutputSlot(0).Connect(output->GetInputSlot(0)); // Set tensor info input->GetOutputSlot(0).SetTensorInfo(inputInfo); alpha->GetOutputSlot(0).SetTensorInfo(alphaInfo); prelu->GetOutputSlot(0).SetTensorInfo(outputInfo); INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); TestPreluQuantization validatorQAsymmU8(inputShape, alphaShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); TestPreluQuantization validatorQAsymmS8(qAsymmS8Options, inputShape, alphaShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); const QuantizerOptions qSymmS8Options(DataType::QSymmS8); INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); TestPreluQuantization validatorQSymmS8(qSymmS8Options, inputShape, alphaShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); const QuantizerOptions qSymmS16options(DataType::QSymmS16); INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); TestPreluQuantization validatorQSymmS16(qSymmS16options, inputShape, alphaShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); } void TestQuantizeTransposeConvolution2d(bool useBiases) { INetworkPtr network = INetwork::Create(); TensorShape shape{ 3 }; TensorInfo info(shape, DataType::Float32); std::initializer_list floatData{ -1.0f, 1.5f, 2.0f }; std::vector weightsData(floatData); ConstTensor weights(info, weightsData); TransposeConvolution2dDescriptor descriptor; descriptor.m_BiasEnabled = useBiases; // construct network IConnectableLayer* input = network->AddInputLayer(0); Optional optionalBiases; std::vector biasesData(floatData); if (useBiases) { ConstTensor biases(info, biasesData); optionalBiases = Optional(biases); } IConnectableLayer* transposeConv2d = network->AddTransposeConvolution2dLayer(descriptor, weights, optionalBiases); IConnectableLayer* output = network->AddOutputLayer(1); input->GetOutputSlot(0).Connect(transposeConv2d->GetInputSlot(0)); transposeConv2d->GetOutputSlot(0).Connect(output->GetInputSlot(0)); input->GetOutputSlot(0).SetTensorInfo(info); transposeConv2d->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } BOOST_AUTO_TEST_CASE(QuantizeTransposeConvolution2d) { TestQuantizeTransposeConvolution2d(false); } BOOST_AUTO_TEST_CASE(QuantizeTransposeConvolution2dWithBiases) { TestQuantizeTransposeConvolution2d(true); } BOOST_AUTO_TEST_CASE(QuantizeStack) { class TestStackQuantization : public TestQuantization { public: TestStackQuantization(const TensorShape& inputShape, const TensorShape& outputShape) : TestQuantization(inputShape, outputShape) {} TestStackQuantization(const QuantizerOptions& options, const TensorShape& inputShape, const TensorShape& outputShape) : TestQuantization(options, inputShape, outputShape) {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) override { IgnoreUnused(name, constants, id, descriptor); switch (layer->GetType()) { case armnn::LayerType::Input : { break; } case armnn::LayerType::Output : { break; } case armnn::LayerType::Stack : { TensorInfo outputInfo = layer->GetOutputSlot(0).GetTensorInfo(); TestQuantizationParams(outputInfo, { 30.0f / g_AsymmU8QuantizationBase, 128 }, { 30.0f / g_AsymmS8QuantizationBase, 0}, { 15.0f / g_SymmS8QuantizationBase, 0}, { 15.0f / g_SymmS16QuantizationBase, 0 }); break; } default: {} } } }; INetworkPtr network = INetwork::Create(); IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* input1 = network->AddInputLayer(1); const TensorShape inputShape{ 3, 4, 5 }; const TensorShape outputShape{ 3, 4, 2, 5 }; StackDescriptor descriptor(2, 2, inputShape); IConnectableLayer* stackLayer = network->AddStackLayer(descriptor); IConnectableLayer* output = network->AddOutputLayer(0); input0->GetOutputSlot(0).Connect(stackLayer->GetInputSlot(0)); input1->GetOutputSlot(0).Connect(stackLayer->GetInputSlot(1)); stackLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0)); INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get())->ExportNetwork(); TestStackQuantization validatorQAsymmU8(inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); const QuantizerOptions qAsymmS8Options(DataType::QAsymmS8); INetworkPtr quantizedNetworkQAsymmS8 = INetworkQuantizer::Create(network.get(), qAsymmS8Options)->ExportNetwork(); TestStackQuantization validatorQAsymmS8(qAsymmS8Options, inputShape, inputShape); VisitLayersTopologically(quantizedNetworkQAsymmS8.get(), validatorQAsymmS8); const QuantizerOptions qSymmS8Options(DataType::QSymmS8); INetworkPtr quantizedNetworkQSymmS8 = INetworkQuantizer::Create(network.get(), qSymmS8Options)->ExportNetwork(); TestStackQuantization validatorQSymmS8(qSymmS8Options, inputShape, inputShape); VisitLayersTopologically(quantizedNetworkQSymmS8.get(), validatorQSymmS8); const QuantizerOptions qSymmS16options(DataType::QSymmS16); INetworkPtr quantizedNetworkQSymmS16 = INetworkQuantizer::Create(network.get(), qSymmS16options)->ExportNetwork(); TestStackQuantization validatorQSymmS16(qSymmS16options, inputShape, outputShape); VisitLayersTopologically(quantizedNetworkQSymmS16.get(), validatorQSymmS16); } BOOST_AUTO_TEST_CASE(QuantizeSlice) { TensorShape shape{ 3 }; TensorInfo info(shape, DataType::Float32); INetworkPtr network = INetwork::Create(); IConnectableLayer* inputLayer = network->AddInputLayer(0); IConnectableLayer* sliceLayer = network->AddSliceLayer(SliceDescriptor()); IConnectableLayer* outputLayer = network->AddOutputLayer(0); inputLayer->GetOutputSlot(0).Connect(sliceLayer->GetInputSlot(0)); sliceLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(info); sliceLayer->GetOutputSlot(0).SetTensorInfo(info); TestNetwork(network.get(), shape); } std::vector SetupQuantize(float value) { armnn::TensorInfo inputInfo({ 1, 2, 2 }, armnn::DataType::Float32); inputInfo.SetQuantizationScale(1.0f); inputInfo.SetQuantizationOffset(1); std::vector input({ value, 0.0f, 0.0f, 1.0f }); const std::vector &inputRef = input; auto output = armnnUtils::QuantizedVector(inputRef, inputInfo.GetQuantizationScale(), inputInfo.GetQuantizationOffset()); return output; } BOOST_AUTO_TEST_CASE(QuantizeInf) { BOOST_CHECK_EQUAL(SetupQuantize(std::numeric_limits::infinity())[0], 255); } BOOST_AUTO_TEST_CASE(QuantizeNegativeInf) { BOOST_CHECK_EQUAL(SetupQuantize(-1 * std::numeric_limits::infinity())[0], 0); } class TestPreserveType : public TestQuantization { public: TestPreserveType(const QuantizerOptions& options, const DataType& dataType, const TensorShape& inputShape, const TensorShape& outputShape) : TestQuantization(options, inputShape, outputShape) , m_DataType(dataType) , m_VisitedQuantizeLayer(false) , m_VisitedDequantizeLayer(false) {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) override { IgnoreUnused(name, constants, id, descriptor); switch (layer->GetType()) { case armnn::LayerType::Input : { const TensorInfo& info = layer->GetOutputSlot(0).GetTensorInfo(); BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); BOOST_TEST(m_InputShape == info.GetShape()); break; } case armnn::LayerType::Output : { const TensorInfo& info = layer->GetInputSlot(0).GetConnection()->GetTensorInfo(); BOOST_TEST(GetDataTypeName(info.GetDataType()) == GetDataTypeName(m_DataType)); BOOST_TEST(m_OutputShape == info.GetShape()); break; } case armnn::LayerType::Quantize : { m_VisitedQuantizeLayer = true; break; } case armnn::LayerType::Dequantize : { m_VisitedDequantizeLayer = true; break; } default: {} } } void CheckQuantizeDequantizeLayerVisited(bool expected) { if (expected) { BOOST_CHECK(m_VisitedQuantizeLayer); BOOST_CHECK(m_VisitedDequantizeLayer); } else { BOOST_CHECK(!m_VisitedQuantizeLayer); BOOST_CHECK(!m_VisitedDequantizeLayer); } } private: const DataType m_DataType; bool m_VisitedQuantizeLayer; bool m_VisitedDequantizeLayer; }; void PreserveTypeTestImpl(const DataType& dataType) { INetworkPtr network = INetwork::Create(); // Add the layers IConnectableLayer* input0 = network->AddInputLayer(0); IConnectableLayer* input1 = network->AddInputLayer(1); IConnectableLayer* addition = network->AddAdditionLayer(); IConnectableLayer* output = network->AddOutputLayer(2); input0->GetOutputSlot(0).Connect(addition->GetInputSlot(0)); input1->GetOutputSlot(0).Connect(addition->GetInputSlot(1)); addition->GetOutputSlot(0).Connect(output->GetInputSlot(0)); const TensorShape shape{1U, 2U, 3U}; const TensorInfo info(shape, dataType); input0->GetOutputSlot(0).SetTensorInfo(info); input1->GetOutputSlot(0).SetTensorInfo(info); addition->GetOutputSlot(0).SetTensorInfo(info); QuantizerOptions options = dataType == DataType::Float32 ? QuantizerOptions(DataType::QAsymmU8, true) : QuantizerOptions(dataType, true); INetworkPtr quantizedNetworkQAsymmU8 = INetworkQuantizer::Create(network.get(), options)->ExportNetwork(); TestPreserveType validatorQAsymmU8(options, dataType, shape, shape); VisitLayersTopologically(quantizedNetworkQAsymmU8.get(), validatorQAsymmU8); validatorQAsymmU8.CheckQuantizeDequantizeLayerVisited( dataType == DataType::Float32 || dataType == DataType::Float16); } BOOST_AUTO_TEST_CASE(PreserveTypeFloat32) { PreserveTypeTestImpl(DataType::Float32); } BOOST_AUTO_TEST_CASE(PreserveTypeQAsymmU8) { PreserveTypeTestImpl(DataType::QAsymmU8); } BOOST_AUTO_TEST_CASE(PreserveTypeQsymm8) { PreserveTypeTestImpl(DataType::QSymmS8); } BOOST_AUTO_TEST_CASE(PreserveTypeQsymm16) { PreserveTypeTestImpl(DataType::QSymmS16); } BOOST_AUTO_TEST_CASE(TestConnectionPreservationAfterDynamicQuant) { class TestConnectionPreservation : public IStrategy { public: TestConnectionPreservation(const Graph& graph) : m_Graph(graph) {} void ExecuteStrategy(const armnn::IConnectableLayer* layer, const BaseDescriptor& descriptor, const std::vector& constants, const char* name, const armnn::LayerBindingId id) override { IgnoreUnused(name, constants, id, descriptor); switch (layer->GetType()) { case armnn::LayerType::Addition : { CheckLayerName(layer->GetInputSlot(0).GetConnection()->GetOwningLayerGuid(), "reLU1"); CheckLayerName(layer->GetInputSlot(1).GetConnection()->GetOwningLayerGuid(), "reLU2"); break; } default: {} } } void CheckLayerName(LayerGuid guid, std::string expectedName) { bool guidFound = false; for (Layer* layer : m_Graph) { if (layer->GetGuid() == guid) { BOOST_CHECK_EQUAL(layer->GetName(), expectedName.c_str()); guidFound = true; break; } } if (!guidFound) { BOOST_FAIL("No layer matching the GUID was found"); } } private: Graph m_Graph; }; class TestNetwork : public INetwork { public : NetworkImpl* GetPNetworkImpl() { return pNetworkImpl.get(); } }; TestNetwork testNetwork; IConnectableLayer* inputLayer = testNetwork.AddInputLayer(0,"inputLayer1"); armnn::ActivationDescriptor ReLUDesc; ReLUDesc.m_Function = ActivationFunction::ReLu; IConnectableLayer* reLULayer1 = testNetwork.AddActivationLayer(ReLUDesc, "reLU1"); IConnectableLayer* reLULayer2 = testNetwork.AddActivationLayer(ReLUDesc, "reLU2"); IConnectableLayer* addLayer1 = testNetwork.AddAdditionLayer("addLayer1"); IConnectableLayer* outputLayer = testNetwork.AddOutputLayer(0,"outPutLayer1"); inputLayer->GetOutputSlot(0).Connect(reLULayer1->GetInputSlot(0)); reLULayer1->GetOutputSlot(0).Connect(reLULayer2->GetInputSlot(0)); reLULayer1->GetOutputSlot(0).Connect(addLayer1->GetInputSlot(0)); reLULayer2->GetOutputSlot(0).Connect(addLayer1->GetInputSlot(1)); addLayer1->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); inputLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); reLULayer1->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); reLULayer2->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); addLayer1->GetOutputSlot(0).SetTensorInfo(TensorInfo(TensorShape({1, 2, 2, 1}), DataType::Float32)); TestConnectionPreservation strategy1(testNetwork.GetPNetworkImpl()->GetGraph()); VisitLayersTopologically(&testNetwork, strategy1); armnn::INetworkQuantizerPtr quantizer = armnn::INetworkQuantizer::Create(&testNetwork); armnn::TensorInfo tensorInfo = GetInputTensorInfo(&testNetwork); std::vector inputData({0, 2, 0, 4}); armnn::ConstTensor inputTensor(tensorInfo, inputData.data()); InputTensors inputTensors; inputTensors.push_back(std::make_pair(0, inputTensor)); quantizer->Refine(inputTensors); INetworkPtr quantNetwork = quantizer->ExportNetwork(); TestNetwork* testQuantNetwork = static_cast(quantNetwork.get()); TestConnectionPreservation strategy2(testQuantNetwork->GetPNetworkImpl()->GetGraph()); VisitLayersTopologically(quantNetwork.get(), strategy2); } BOOST_AUTO_TEST_SUITE_END() } // namespace armnn