diff options
-rw-r--r-- | CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/armnn/Network.cpp | 7 | ||||
-rw-r--r-- | src/armnn/optimizations/All.hpp | 3 | ||||
-rw-r--r-- | src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp | 136 | ||||
-rw-r--r-- | src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp | 224 | ||||
-rw-r--r-- | src/armnnUtils/CompatibleTypes.cpp | 8 |
6 files changed, 375 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bf7a40d21..966a273466 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -612,6 +612,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp src/armnn/test/optimizations/FuseActivationTests.cpp src/armnn/test/optimizations/InsertDebugLayerTests.cpp + src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp src/armnn/test/optimizations/MovePermuteUpTests.cpp src/armnn/test/optimizations/MoveTransposeUpTests.cpp src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 60df27d7fc..1a4fec59ce 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1968,8 +1968,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } - // Perform BroadcastToOptimizationLayer and then AddBroadcastReshapeLayer optimisation using namespace optimizations; + // Substitute Max + Min with Bounded Relu before AddBroadcastReshapeLayer optimisation, + // as Bounded ReLu needs the constants to be 1D size 1 + Optimizer::Pass(optGraph, MakeOptimizations(MaxMinIntoBoundedRelu())); + + // Perform BroadcastToOptimizationLayer before AddBroadcastReshapeLayer optimisation Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer())); Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer())); @@ -1980,7 +1984,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } - // Group Constant Layer optimizations together where possible. // This is important as: // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index abf4cde442..386b6e0c56 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2018-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -13,6 +13,7 @@ #include "DeleteBroadcastTo.hpp" #include "FoldPadIntoLayer2d.hpp" #include "FuseBatchNorm.hpp" +#include "MaxMinIntoBoundedRelu.hpp" #include "MovePermuteUp.hpp" #include "MoveTransposeUp.hpp" #include "OptimizeConsecutiveReshapes.hpp" diff --git a/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp b/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp new file mode 100644 index 0000000000..99089f3a3e --- /dev/null +++ b/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp @@ -0,0 +1,136 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Optimization.hpp" + +namespace armnn::optimizations +{ + +class MaxMinIntoBoundedReluImpl +{ +public: + /// Run for every exclusive connection between any Max & Min layers + /// The Max, Min and its associated constant inputs will be removed, and replaced with a BoundedRelu Activation + static void Run(Graph& graph, InputSlot& connection) + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + auto& maxLayer = *PolymorphicDowncast<ElementwiseBinaryLayer*>(&base); + if (maxLayer.GetParameters().m_Operation != BinaryOperation::Maximum) + { + return; + } + auto& minLayer = *PolymorphicDowncast<ElementwiseBinaryLayer*>(&child); + if (minLayer.GetParameters().m_Operation != BinaryOperation::Minimum) + { + return; + } + + if (maxLayer.GetDataType() != minLayer.GetDataType()) + { + return; + } + + // get max and min values + float_t maxValue; + if (!GetValue(maxLayer, maxValue)) + { + return; + } + float_t minValue; + if (!GetValue(minLayer, minValue)) + { + return; + } + + // Save original parent output slot of the max layer + OutputSlot& parentOut = *maxLayer.GetInputSlot(0).GetConnectedOutputSlot(); + + // Insert activation layer between max layer and its parent layer + ActivationDescriptor boundedReluDescriptor(ActivationFunction::BoundedReLu, minValue, maxValue); + const std::string name = std::string("replaced-") + maxLayer.GetName() + std::string("-") + minLayer.GetName() + + std::string("-with-BoundedRelu"); + auto& boundedReluLayer = *graph.InsertNewLayer<ActivationLayer>(maxLayer.GetInputSlot(0), + boundedReluDescriptor, + name.c_str()); + + // Reconnects with original parent. + boundedReluLayer.GetOutputSlot().MoveAllConnections(parentOut); + + // Moves connections in min layer output to parent layer. + // Min layer will be removed as it's left unconnected. + // Max layer will be removed if left unconnected. + minLayer.GetOutputSlot().MoveAllConnections(boundedReluLayer.GetOutputSlot()); + } + +protected: + MaxMinIntoBoundedReluImpl() = default; + ~MaxMinIntoBoundedReluImpl() = default; + +private: + static float_t GetConstTensorValue(Layer& layer) + { + auto& constLayer = *PolymorphicDowncast<ConstantLayer*>(&layer); + switch (constLayer.GetDataType()) + { + case DataType::Float32: + return *constLayer.m_LayerOutput->GetConstTensor<float>(); + case DataType::BFloat16: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<BFloat16>()); + case DataType::Float16: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<half_float::half>()); + case DataType::QAsymmU8: + case DataType::Boolean: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<uint8_t>()); + case DataType::QAsymmS8: + case DataType::QSymmS8: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int8_t>()); + case DataType::QSymmS16: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int16_t>()); + case DataType::Signed32: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int32_t>()); + case DataType::Signed64: + return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int64_t>()); + default: + throw InvalidArgumentException("No supported Data Type"); + } + } + + static bool GetValue(Layer& layer, float_t& value) + { + Layer& input0 = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + Layer& input1 = layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); + if (input0.GetType() == LayerType::Constant) + { + if (input0.GetOutputSlot(0).GetTensorInfo().GetNumElements() != 1) + { + return false; + } + value = GetConstTensorValue(input0); + } + else if (input1.GetType() == LayerType::Constant) + { + if (input1.GetOutputSlot(0).GetTensorInfo().GetNumElements() != 1) + { + return false; + } + value = GetConstTensorValue(input1); + } + else + { + return false; + } + return true; + }; +}; + +using MaxMinIntoBoundedRelu = OptimizeForExclusiveConnection<ElementwiseBinaryLayer, + ElementwiseBinaryLayer, + MaxMinIntoBoundedReluImpl>; + +} // namespace armnn::optimizations
\ No newline at end of file diff --git a/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp b/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp new file mode 100644 index 0000000000..62f8f08f3b --- /dev/null +++ b/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp @@ -0,0 +1,224 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <TestUtils.hpp> +#include <ResolveType.hpp> +#include <armnnUtils/QuantizeHelper.hpp> + +#include <armnn/INetwork.hpp> + +#include <doctest/doctest.h> + +using namespace armnn; + +namespace +{ +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void MulMaxMinTest(Compute backendId, size_t numLayers) +{ + const TensorInfo input0TensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 10 : 0, + true); + const TensorInfo input1TensorInfo({ 1, 1, 1, 1 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 11 : 0, + true); + const TensorInfo maxInput1TensorInfo({ 1, 1, 1, 1 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 12 : 0, + true); + const TensorInfo minInput1TensorInfo({ 1, 1, 1, 1 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 13 : 0, + true); + const TensorInfo outputTensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.5f : 1, + IsQuantizedType<T>() ? 14 : 0); + + std::vector<float> input0Data + { + 0.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, -1.0f, + -2.0f, -2.0f, -2.0f + }; + std::vector<float> input1Data + { + 1.0f + }; + std::vector<float> maxInput1Data + { + -100.0f + }; + std::vector<float> minInput1Data + { + 100.0f + }; + std::vector<float> outputExpectedData = + { + 0.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, -1.0f, + -2.0f, -2.0f, -2.0f + }; + + std::vector<T> input0 = armnnUtils::QuantizedVector<T>(input0Data, + input0TensorInfo.GetQuantizationScale(), + input0TensorInfo.GetQuantizationOffset()); + std::vector<T> input1 = armnnUtils::QuantizedVector<T>(input1Data, + input1TensorInfo.GetQuantizationScale(), + input1TensorInfo.GetQuantizationOffset()); + std::vector<T> maxInput1 = armnnUtils::QuantizedVector<T>(maxInput1Data, + maxInput1TensorInfo.GetQuantizationScale(), + maxInput1TensorInfo.GetQuantizationOffset()); + std::vector<T> minInput1 = armnnUtils::QuantizedVector<T>(minInput1Data, + minInput1TensorInfo.GetQuantizationScale(), + minInput1TensorInfo.GetQuantizationOffset()); + std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>(outputExpectedData, + outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset()); + std::vector<T> outputActual(outputTensorInfo.GetNumElements()); + + // Create a network + INetworkPtr network = INetwork::Create(); + + // add layers to network + IConnectableLayer* const input0Layer = network->AddInputLayer(0); + IConnectableLayer* const input1Layer = network->AddInputLayer(1); + IConnectableLayer* const mulLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Mul, "mul"); + + auto constMaxInput1Tensor = ConstTensor(maxInput1TensorInfo, maxInput1); + IConnectableLayer* const maxInput1Layer = network->AddConstantLayer(constMaxInput1Tensor, "maxInput1"); + IConnectableLayer* const maxLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Maximum, "max"); + + auto constMinInput1Tensor = ConstTensor(minInput1TensorInfo, minInput1); + IConnectableLayer* const minInput1Layer = network->AddConstantLayer(constMinInput1Tensor, "minInput1"); + IConnectableLayer* const minLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Minimum, "min"); + + IConnectableLayer* const outputLayer = network->AddOutputLayer(0); + + // set tensor info to output slots + input0Layer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + input1Layer->GetOutputSlot(0).SetTensorInfo(input1TensorInfo); + mulLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + maxInput1Layer->GetOutputSlot(0).SetTensorInfo(maxInput1TensorInfo); + maxLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + minInput1Layer->GetOutputSlot(0).SetTensorInfo(minInput1TensorInfo); + minLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // connect layers. + // In0 In1 + // \ / + // Mul maxIn1 + // | / + // Max minIn1 + // | / + // Min + // | + // Out + input0Layer ->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(0)); + input1Layer ->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(1)); + mulLayer ->GetOutputSlot(0).Connect(maxLayer->GetInputSlot(0)); + maxInput1Layer->GetOutputSlot(0).Connect(maxLayer->GetInputSlot(1)); + maxLayer ->GetOutputSlot(0).Connect(minLayer->GetInputSlot(0)); + minInput1Layer->GetOutputSlot(0).Connect(minLayer->GetInputSlot(1)); + minLayer ->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Create ArmNN runtime + IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + + auto checkMul = [ ](const armnn::Layer* const layer) -> bool + { + auto* mulLayer = PolymorphicDowncast<const ElementwiseBinaryLayer*>(layer); + + return IsLayerOfType<ElementwiseBinaryLayer>(layer) && + (mulLayer->GetParameters().m_Operation == BinaryOperation::Mul); + }; + + auto checkBoundedRelu = [ ](const armnn::Layer* const layer) -> bool + { + auto* activationLayer = PolymorphicDowncast<const ActivationLayer*>(layer); + + return IsLayerOfType<ActivationLayer>(layer) && + (activationLayer->GetParameters().m_Function == ActivationFunction::BoundedReLu); + }; + + // 2 inputs, mul, activation(in CpuRef and CpuAcc), output + CHECK((graph.GetNumLayers() == numLayers)); + if (numLayers == 4) + { + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<InputLayer>, + checkMul, + &IsLayerOfType<OutputLayer>)); + } + else if (numLayers == 5) + { + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<InputLayer>, + checkMul, + checkBoundedRelu, + &IsLayerOfType<OutputLayer>)); + } + + // Load network into runtime + NetworkId networkIdentifier; + run->LoadNetwork(networkIdentifier, std::move(optNet)); + + // Create input and output tensors + InputTensors inputTensors + { + {0, ConstTensor(input0TensorInfo, input0.data())}, + {1, ConstTensor(input1TensorInfo, input1.data())} + }; + OutputTensors outputTensors + { + {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputActual.data())} + }; + + // Run inference + run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + + // Checks the results + CHECK(outputActual == outputExpected); +} +} + +TEST_SUITE("Optimizer") +{ +#if defined(ARMNNREF_ENABLED) +TEST_CASE("FuseMulMaxMinTest_Float_CpuRef") +{ + MulMaxMinTest<DataType::Float32>(Compute::CpuRef, 5); +} +#endif +#if defined(ARMCOMPUTENEON_ENABLED) +TEST_CASE("FuseMulMaxMinTest_Float_CpuAcc") +{ + MulMaxMinTest<DataType::Float32>(Compute::CpuAcc, 5); +} +#endif +#if defined(ARMCOMPUTECL_ENABLED) +TEST_CASE("FuseMulMaxMinTest_Float_GpuAcc") +{ + MulMaxMinTest<DataType::Float32>(Compute::GpuAcc, 4); +} +#endif +}
\ No newline at end of file diff --git a/src/armnnUtils/CompatibleTypes.cpp b/src/armnnUtils/CompatibleTypes.cpp index 9a3251d293..726051608d 100644 --- a/src/armnnUtils/CompatibleTypes.cpp +++ b/src/armnnUtils/CompatibleTypes.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2019-2021, 2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include <armnn/Types.hpp> @@ -62,4 +62,10 @@ bool CompatibleTypes<int32_t>(DataType dataType) return dataType == DataType::Signed32; } +template<> +bool CompatibleTypes<int64_t>(DataType dataType) +{ + return dataType == DataType::Signed64; +} + } //namespace armnnUtils |