From 5cc8e447a63d9eb32f0e5ab6c66e755a52fc584d Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Wed, 21 Feb 2024 14:07:41 +0000 Subject: IVGCVSW-8299 Optimisation to replace Max + Min with Bounded Relu Signed-off-by: Teresa Charlin Change-Id: I0774a9580710350bd0a959cf68dfe057f52e4330 --- CMakeLists.txt | 1 + src/armnn/Network.cpp | 7 +- src/armnn/optimizations/All.hpp | 3 +- src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp | 136 +++++++++++++ .../optimizations/MaxMinIntoBoundedReluTests.cpp | 224 +++++++++++++++++++++ src/armnnUtils/CompatibleTypes.cpp | 8 +- 6 files changed, 375 insertions(+), 4 deletions(-) create mode 100644 src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp create mode 100644 src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 8bf7a40d21..966a273466 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -612,6 +612,7 @@ if(BUILD_UNIT_TESTS) src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp src/armnn/test/optimizations/FuseActivationTests.cpp src/armnn/test/optimizations/InsertDebugLayerTests.cpp + src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp src/armnn/test/optimizations/MovePermuteUpTests.cpp src/armnn/test/optimizations/MoveTransposeUpTests.cpp src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 60df27d7fc..1a4fec59ce 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -1968,8 +1968,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } - // Perform BroadcastToOptimizationLayer and then AddBroadcastReshapeLayer optimisation using namespace optimizations; + // Substitute Max + Min with Bounded Relu before AddBroadcastReshapeLayer optimisation, + // as Bounded ReLu needs the constants to be 1D size 1 + Optimizer::Pass(optGraph, MakeOptimizations(MaxMinIntoBoundedRelu())); + + // Perform BroadcastToOptimizationLayer before AddBroadcastReshapeLayer optimisation Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer())); Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer())); @@ -1980,7 +1984,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.InferTensorInfos(); } - // Group Constant Layer optimizations together where possible. // This is important as: // FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp index abf4cde442..386b6e0c56 100644 --- a/src/armnn/optimizations/All.hpp +++ b/src/armnn/optimizations/All.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2018-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -13,6 +13,7 @@ #include "DeleteBroadcastTo.hpp" #include "FoldPadIntoLayer2d.hpp" #include "FuseBatchNorm.hpp" +#include "MaxMinIntoBoundedRelu.hpp" #include "MovePermuteUp.hpp" #include "MoveTransposeUp.hpp" #include "OptimizeConsecutiveReshapes.hpp" diff --git a/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp b/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp new file mode 100644 index 0000000000..99089f3a3e --- /dev/null +++ b/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp @@ -0,0 +1,136 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "Optimization.hpp" + +namespace armnn::optimizations +{ + +class MaxMinIntoBoundedReluImpl +{ +public: + /// Run for every exclusive connection between any Max & Min layers + /// The Max, Min and its associated constant inputs will be removed, and replaced with a BoundedRelu Activation + static void Run(Graph& graph, InputSlot& connection) + { + Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer(); + Layer& child = connection.GetOwningLayer(); + + auto& maxLayer = *PolymorphicDowncast(&base); + if (maxLayer.GetParameters().m_Operation != BinaryOperation::Maximum) + { + return; + } + auto& minLayer = *PolymorphicDowncast(&child); + if (minLayer.GetParameters().m_Operation != BinaryOperation::Minimum) + { + return; + } + + if (maxLayer.GetDataType() != minLayer.GetDataType()) + { + return; + } + + // get max and min values + float_t maxValue; + if (!GetValue(maxLayer, maxValue)) + { + return; + } + float_t minValue; + if (!GetValue(minLayer, minValue)) + { + return; + } + + // Save original parent output slot of the max layer + OutputSlot& parentOut = *maxLayer.GetInputSlot(0).GetConnectedOutputSlot(); + + // Insert activation layer between max layer and its parent layer + ActivationDescriptor boundedReluDescriptor(ActivationFunction::BoundedReLu, minValue, maxValue); + const std::string name = std::string("replaced-") + maxLayer.GetName() + std::string("-") + minLayer.GetName() + + std::string("-with-BoundedRelu"); + auto& boundedReluLayer = *graph.InsertNewLayer(maxLayer.GetInputSlot(0), + boundedReluDescriptor, + name.c_str()); + + // Reconnects with original parent. + boundedReluLayer.GetOutputSlot().MoveAllConnections(parentOut); + + // Moves connections in min layer output to parent layer. + // Min layer will be removed as it's left unconnected. + // Max layer will be removed if left unconnected. + minLayer.GetOutputSlot().MoveAllConnections(boundedReluLayer.GetOutputSlot()); + } + +protected: + MaxMinIntoBoundedReluImpl() = default; + ~MaxMinIntoBoundedReluImpl() = default; + +private: + static float_t GetConstTensorValue(Layer& layer) + { + auto& constLayer = *PolymorphicDowncast(&layer); + switch (constLayer.GetDataType()) + { + case DataType::Float32: + return *constLayer.m_LayerOutput->GetConstTensor(); + case DataType::BFloat16: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + case DataType::Float16: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + case DataType::QAsymmU8: + case DataType::Boolean: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + case DataType::QAsymmS8: + case DataType::QSymmS8: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + case DataType::QSymmS16: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + case DataType::Signed32: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + case DataType::Signed64: + return static_cast(*constLayer.m_LayerOutput->GetConstTensor()); + default: + throw InvalidArgumentException("No supported Data Type"); + } + } + + static bool GetValue(Layer& layer, float_t& value) + { + Layer& input0 = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + Layer& input1 = layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); + if (input0.GetType() == LayerType::Constant) + { + if (input0.GetOutputSlot(0).GetTensorInfo().GetNumElements() != 1) + { + return false; + } + value = GetConstTensorValue(input0); + } + else if (input1.GetType() == LayerType::Constant) + { + if (input1.GetOutputSlot(0).GetTensorInfo().GetNumElements() != 1) + { + return false; + } + value = GetConstTensorValue(input1); + } + else + { + return false; + } + return true; + }; +}; + +using MaxMinIntoBoundedRelu = OptimizeForExclusiveConnection; + +} // namespace armnn::optimizations \ No newline at end of file diff --git a/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp b/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp new file mode 100644 index 0000000000..62f8f08f3b --- /dev/null +++ b/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp @@ -0,0 +1,224 @@ +// +// Copyright © 2024 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include + +#include + +#include + +using namespace armnn; + +namespace +{ +template> +void MulMaxMinTest(Compute backendId, size_t numLayers) +{ + const TensorInfo input0TensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType() ? 0.25f : 1, + IsQuantizedType() ? 10 : 0, + true); + const TensorInfo input1TensorInfo({ 1, 1, 1, 1 }, + ArmnnType, + IsQuantizedType() ? 0.25f : 1, + IsQuantizedType() ? 11 : 0, + true); + const TensorInfo maxInput1TensorInfo({ 1, 1, 1, 1 }, + ArmnnType, + IsQuantizedType() ? 0.25f : 1, + IsQuantizedType() ? 12 : 0, + true); + const TensorInfo minInput1TensorInfo({ 1, 1, 1, 1 }, + ArmnnType, + IsQuantizedType() ? 0.25f : 1, + IsQuantizedType() ? 13 : 0, + true); + const TensorInfo outputTensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType() ? 0.5f : 1, + IsQuantizedType() ? 14 : 0); + + std::vector input0Data + { + 0.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, -1.0f, + -2.0f, -2.0f, -2.0f + }; + std::vector input1Data + { + 1.0f + }; + std::vector maxInput1Data + { + -100.0f + }; + std::vector minInput1Data + { + 100.0f + }; + std::vector outputExpectedData = + { + 0.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, -1.0f, + -2.0f, -2.0f, -2.0f + }; + + std::vector input0 = armnnUtils::QuantizedVector(input0Data, + input0TensorInfo.GetQuantizationScale(), + input0TensorInfo.GetQuantizationOffset()); + std::vector input1 = armnnUtils::QuantizedVector(input1Data, + input1TensorInfo.GetQuantizationScale(), + input1TensorInfo.GetQuantizationOffset()); + std::vector maxInput1 = armnnUtils::QuantizedVector(maxInput1Data, + maxInput1TensorInfo.GetQuantizationScale(), + maxInput1TensorInfo.GetQuantizationOffset()); + std::vector minInput1 = armnnUtils::QuantizedVector(minInput1Data, + minInput1TensorInfo.GetQuantizationScale(), + minInput1TensorInfo.GetQuantizationOffset()); + std::vector outputExpected = armnnUtils::QuantizedVector(outputExpectedData, + outputTensorInfo.GetQuantizationScale(), + outputTensorInfo.GetQuantizationOffset()); + std::vector outputActual(outputTensorInfo.GetNumElements()); + + // Create a network + INetworkPtr network = INetwork::Create(); + + // add layers to network + IConnectableLayer* const input0Layer = network->AddInputLayer(0); + IConnectableLayer* const input1Layer = network->AddInputLayer(1); + IConnectableLayer* const mulLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Mul, "mul"); + + auto constMaxInput1Tensor = ConstTensor(maxInput1TensorInfo, maxInput1); + IConnectableLayer* const maxInput1Layer = network->AddConstantLayer(constMaxInput1Tensor, "maxInput1"); + IConnectableLayer* const maxLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Maximum, "max"); + + auto constMinInput1Tensor = ConstTensor(minInput1TensorInfo, minInput1); + IConnectableLayer* const minInput1Layer = network->AddConstantLayer(constMinInput1Tensor, "minInput1"); + IConnectableLayer* const minLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Minimum, "min"); + + IConnectableLayer* const outputLayer = network->AddOutputLayer(0); + + // set tensor info to output slots + input0Layer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + input1Layer->GetOutputSlot(0).SetTensorInfo(input1TensorInfo); + mulLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + maxInput1Layer->GetOutputSlot(0).SetTensorInfo(maxInput1TensorInfo); + maxLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + minInput1Layer->GetOutputSlot(0).SetTensorInfo(minInput1TensorInfo); + minLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo); + + // connect layers. + // In0 In1 + // \ / + // Mul maxIn1 + // | / + // Max minIn1 + // | / + // Min + // | + // Out + input0Layer ->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(0)); + input1Layer ->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(1)); + mulLayer ->GetOutputSlot(0).Connect(maxLayer->GetInputSlot(0)); + maxInput1Layer->GetOutputSlot(0).Connect(maxLayer->GetInputSlot(1)); + maxLayer ->GetOutputSlot(0).Connect(minLayer->GetInputSlot(0)); + minInput1Layer->GetOutputSlot(0).Connect(minLayer->GetInputSlot(1)); + minLayer ->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + + // Create ArmNN runtime + IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + + auto checkMul = [ ](const armnn::Layer* const layer) -> bool + { + auto* mulLayer = PolymorphicDowncast(layer); + + return IsLayerOfType(layer) && + (mulLayer->GetParameters().m_Operation == BinaryOperation::Mul); + }; + + auto checkBoundedRelu = [ ](const armnn::Layer* const layer) -> bool + { + auto* activationLayer = PolymorphicDowncast(layer); + + return IsLayerOfType(layer) && + (activationLayer->GetParameters().m_Function == ActivationFunction::BoundedReLu); + }; + + // 2 inputs, mul, activation(in CpuRef and CpuAcc), output + CHECK((graph.GetNumLayers() == numLayers)); + if (numLayers == 4) + { + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + checkMul, + &IsLayerOfType)); + } + else if (numLayers == 5) + { + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType, + &IsLayerOfType, + checkMul, + checkBoundedRelu, + &IsLayerOfType)); + } + + // Load network into runtime + NetworkId networkIdentifier; + run->LoadNetwork(networkIdentifier, std::move(optNet)); + + // Create input and output tensors + InputTensors inputTensors + { + {0, ConstTensor(input0TensorInfo, input0.data())}, + {1, ConstTensor(input1TensorInfo, input1.data())} + }; + OutputTensors outputTensors + { + {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputActual.data())} + }; + + // Run inference + run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + + // Checks the results + CHECK(outputActual == outputExpected); +} +} + +TEST_SUITE("Optimizer") +{ +#if defined(ARMNNREF_ENABLED) +TEST_CASE("FuseMulMaxMinTest_Float_CpuRef") +{ + MulMaxMinTest(Compute::CpuRef, 5); +} +#endif +#if defined(ARMCOMPUTENEON_ENABLED) +TEST_CASE("FuseMulMaxMinTest_Float_CpuAcc") +{ + MulMaxMinTest(Compute::CpuAcc, 5); +} +#endif +#if defined(ARMCOMPUTECL_ENABLED) +TEST_CASE("FuseMulMaxMinTest_Float_GpuAcc") +{ + MulMaxMinTest(Compute::GpuAcc, 4); +} +#endif +} \ No newline at end of file diff --git a/src/armnnUtils/CompatibleTypes.cpp b/src/armnnUtils/CompatibleTypes.cpp index 9a3251d293..726051608d 100644 --- a/src/armnnUtils/CompatibleTypes.cpp +++ b/src/armnnUtils/CompatibleTypes.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2019-2021, 2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include @@ -62,4 +62,10 @@ bool CompatibleTypes(DataType dataType) return dataType == DataType::Signed32; } +template<> +bool CompatibleTypes(DataType dataType) +{ + return dataType == DataType::Signed64; +} + } //namespace armnnUtils -- cgit v1.2.1