aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTeresa Charlin <teresa.charlinreyes@arm.com>2024-02-21 14:07:41 +0000
committerTeresa Charlin <teresa.charlinreyes@arm.com>2024-03-08 15:43:37 +0000
commit5cc8e447a63d9eb32f0e5ab6c66e755a52fc584d (patch)
tree29fa33969a978409c8c8d51bad86123a52cb5513
parente4ccada2ed349f22cf75c4b8d8d6ba9dc3990694 (diff)
downloadarmnn-5cc8e447a63d9eb32f0e5ab6c66e755a52fc584d.tar.gz
IVGCVSW-8299 Optimisation to replace Max + Min with Bounded Relu
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Change-Id: I0774a9580710350bd0a959cf68dfe057f52e4330
-rw-r--r--CMakeLists.txt1
-rw-r--r--src/armnn/Network.cpp7
-rw-r--r--src/armnn/optimizations/All.hpp3
-rw-r--r--src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp136
-rw-r--r--src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp224
-rw-r--r--src/armnnUtils/CompatibleTypes.cpp8
6 files changed, 375 insertions, 4 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8bf7a40d21..966a273466 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -612,6 +612,7 @@ if(BUILD_UNIT_TESTS)
src/armnn/test/optimizations/Fp32NetworkToFp16ConverterTests.cpp
src/armnn/test/optimizations/FuseActivationTests.cpp
src/armnn/test/optimizations/InsertDebugLayerTests.cpp
+ src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp
src/armnn/test/optimizations/MovePermuteUpTests.cpp
src/armnn/test/optimizations/MoveTransposeUpTests.cpp
src/armnn/test/optimizations/OptimizeConsecutiveReshapesTests.cpp
diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp
index 60df27d7fc..1a4fec59ce 100644
--- a/src/armnn/Network.cpp
+++ b/src/armnn/Network.cpp
@@ -1968,8 +1968,12 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
optGraph.InferTensorInfos();
}
- // Perform BroadcastToOptimizationLayer and then AddBroadcastReshapeLayer optimisation
using namespace optimizations;
+ // Substitute Max + Min with Bounded Relu before AddBroadcastReshapeLayer optimisation,
+ // as Bounded ReLu needs the constants to be 1D size 1
+ Optimizer::Pass(optGraph, MakeOptimizations(MaxMinIntoBoundedRelu()));
+
+ // Perform BroadcastToOptimizationLayer before AddBroadcastReshapeLayer optimisation
Optimizer::Pass(optGraph, MakeOptimizations(BroadcastToOptimizationLayer()));
Optimizer::Pass(optGraph, MakeOptimizations(AddBroadcastReshapeLayer()));
@@ -1980,7 +1984,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph,
optGraph.InferTensorInfos();
}
-
// Group Constant Layer optimizations together where possible.
// This is important as:
// FusePermuteIntoConstantLayer must happen before FoldPadIntoDepthwiseConvolution2d and
diff --git a/src/armnn/optimizations/All.hpp b/src/armnn/optimizations/All.hpp
index abf4cde442..386b6e0c56 100644
--- a/src/armnn/optimizations/All.hpp
+++ b/src/armnn/optimizations/All.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022, 2023 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2018-2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -13,6 +13,7 @@
#include "DeleteBroadcastTo.hpp"
#include "FoldPadIntoLayer2d.hpp"
#include "FuseBatchNorm.hpp"
+#include "MaxMinIntoBoundedRelu.hpp"
#include "MovePermuteUp.hpp"
#include "MoveTransposeUp.hpp"
#include "OptimizeConsecutiveReshapes.hpp"
diff --git a/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp b/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp
new file mode 100644
index 0000000000..99089f3a3e
--- /dev/null
+++ b/src/armnn/optimizations/MaxMinIntoBoundedRelu.hpp
@@ -0,0 +1,136 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "Optimization.hpp"
+
+namespace armnn::optimizations
+{
+
+class MaxMinIntoBoundedReluImpl
+{
+public:
+ /// Run for every exclusive connection between any Max & Min layers
+ /// The Max, Min and its associated constant inputs will be removed, and replaced with a BoundedRelu Activation
+ static void Run(Graph& graph, InputSlot& connection)
+ {
+ Layer& base = connection.GetConnectedOutputSlot()->GetOwningLayer();
+ Layer& child = connection.GetOwningLayer();
+
+ auto& maxLayer = *PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
+ if (maxLayer.GetParameters().m_Operation != BinaryOperation::Maximum)
+ {
+ return;
+ }
+ auto& minLayer = *PolymorphicDowncast<ElementwiseBinaryLayer*>(&child);
+ if (minLayer.GetParameters().m_Operation != BinaryOperation::Minimum)
+ {
+ return;
+ }
+
+ if (maxLayer.GetDataType() != minLayer.GetDataType())
+ {
+ return;
+ }
+
+ // get max and min values
+ float_t maxValue;
+ if (!GetValue(maxLayer, maxValue))
+ {
+ return;
+ }
+ float_t minValue;
+ if (!GetValue(minLayer, minValue))
+ {
+ return;
+ }
+
+ // Save original parent output slot of the max layer
+ OutputSlot& parentOut = *maxLayer.GetInputSlot(0).GetConnectedOutputSlot();
+
+ // Insert activation layer between max layer and its parent layer
+ ActivationDescriptor boundedReluDescriptor(ActivationFunction::BoundedReLu, minValue, maxValue);
+ const std::string name = std::string("replaced-") + maxLayer.GetName() + std::string("-") + minLayer.GetName()
+ + std::string("-with-BoundedRelu");
+ auto& boundedReluLayer = *graph.InsertNewLayer<ActivationLayer>(maxLayer.GetInputSlot(0),
+ boundedReluDescriptor,
+ name.c_str());
+
+ // Reconnects with original parent.
+ boundedReluLayer.GetOutputSlot().MoveAllConnections(parentOut);
+
+ // Moves connections in min layer output to parent layer.
+ // Min layer will be removed as it's left unconnected.
+ // Max layer will be removed if left unconnected.
+ minLayer.GetOutputSlot().MoveAllConnections(boundedReluLayer.GetOutputSlot());
+ }
+
+protected:
+ MaxMinIntoBoundedReluImpl() = default;
+ ~MaxMinIntoBoundedReluImpl() = default;
+
+private:
+ static float_t GetConstTensorValue(Layer& layer)
+ {
+ auto& constLayer = *PolymorphicDowncast<ConstantLayer*>(&layer);
+ switch (constLayer.GetDataType())
+ {
+ case DataType::Float32:
+ return *constLayer.m_LayerOutput->GetConstTensor<float>();
+ case DataType::BFloat16:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<BFloat16>());
+ case DataType::Float16:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<half_float::half>());
+ case DataType::QAsymmU8:
+ case DataType::Boolean:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<uint8_t>());
+ case DataType::QAsymmS8:
+ case DataType::QSymmS8:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int8_t>());
+ case DataType::QSymmS16:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int16_t>());
+ case DataType::Signed32:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int32_t>());
+ case DataType::Signed64:
+ return static_cast<float_t>(*constLayer.m_LayerOutput->GetConstTensor<int64_t>());
+ default:
+ throw InvalidArgumentException("No supported Data Type");
+ }
+ }
+
+ static bool GetValue(Layer& layer, float_t& value)
+ {
+ Layer& input0 = layer.GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer();
+ Layer& input1 = layer.GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer();
+ if (input0.GetType() == LayerType::Constant)
+ {
+ if (input0.GetOutputSlot(0).GetTensorInfo().GetNumElements() != 1)
+ {
+ return false;
+ }
+ value = GetConstTensorValue(input0);
+ }
+ else if (input1.GetType() == LayerType::Constant)
+ {
+ if (input1.GetOutputSlot(0).GetTensorInfo().GetNumElements() != 1)
+ {
+ return false;
+ }
+ value = GetConstTensorValue(input1);
+ }
+ else
+ {
+ return false;
+ }
+ return true;
+ };
+};
+
+using MaxMinIntoBoundedRelu = OptimizeForExclusiveConnection<ElementwiseBinaryLayer,
+ ElementwiseBinaryLayer,
+ MaxMinIntoBoundedReluImpl>;
+
+} // namespace armnn::optimizations \ No newline at end of file
diff --git a/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp b/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp
new file mode 100644
index 0000000000..62f8f08f3b
--- /dev/null
+++ b/src/armnn/test/optimizations/MaxMinIntoBoundedReluTests.cpp
@@ -0,0 +1,224 @@
+//
+// Copyright © 2024 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <TestUtils.hpp>
+#include <ResolveType.hpp>
+#include <armnnUtils/QuantizeHelper.hpp>
+
+#include <armnn/INetwork.hpp>
+
+#include <doctest/doctest.h>
+
+using namespace armnn;
+
+namespace
+{
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+void MulMaxMinTest(Compute backendId, size_t numLayers)
+{
+ const TensorInfo input0TensorInfo({ 1, 2, 2, 3 },
+ ArmnnType,
+ IsQuantizedType<T>() ? 0.25f : 1,
+ IsQuantizedType<T>() ? 10 : 0,
+ true);
+ const TensorInfo input1TensorInfo({ 1, 1, 1, 1 },
+ ArmnnType,
+ IsQuantizedType<T>() ? 0.25f : 1,
+ IsQuantizedType<T>() ? 11 : 0,
+ true);
+ const TensorInfo maxInput1TensorInfo({ 1, 1, 1, 1 },
+ ArmnnType,
+ IsQuantizedType<T>() ? 0.25f : 1,
+ IsQuantizedType<T>() ? 12 : 0,
+ true);
+ const TensorInfo minInput1TensorInfo({ 1, 1, 1, 1 },
+ ArmnnType,
+ IsQuantizedType<T>() ? 0.25f : 1,
+ IsQuantizedType<T>() ? 13 : 0,
+ true);
+ const TensorInfo outputTensorInfo({ 1, 2, 2, 3 },
+ ArmnnType,
+ IsQuantizedType<T>() ? 0.5f : 1,
+ IsQuantizedType<T>() ? 14 : 0);
+
+ std::vector<float> input0Data
+ {
+ 0.0f, 0.0f, 0.0f,
+ 1.0f, 1.0f, 1.0f,
+ -1.0f, -1.0f, -1.0f,
+ -2.0f, -2.0f, -2.0f
+ };
+ std::vector<float> input1Data
+ {
+ 1.0f
+ };
+ std::vector<float> maxInput1Data
+ {
+ -100.0f
+ };
+ std::vector<float> minInput1Data
+ {
+ 100.0f
+ };
+ std::vector<float> outputExpectedData =
+ {
+ 0.0f, 0.0f, 0.0f,
+ 1.0f, 1.0f, 1.0f,
+ -1.0f, -1.0f, -1.0f,
+ -2.0f, -2.0f, -2.0f
+ };
+
+ std::vector<T> input0 = armnnUtils::QuantizedVector<T>(input0Data,
+ input0TensorInfo.GetQuantizationScale(),
+ input0TensorInfo.GetQuantizationOffset());
+ std::vector<T> input1 = armnnUtils::QuantizedVector<T>(input1Data,
+ input1TensorInfo.GetQuantizationScale(),
+ input1TensorInfo.GetQuantizationOffset());
+ std::vector<T> maxInput1 = armnnUtils::QuantizedVector<T>(maxInput1Data,
+ maxInput1TensorInfo.GetQuantizationScale(),
+ maxInput1TensorInfo.GetQuantizationOffset());
+ std::vector<T> minInput1 = armnnUtils::QuantizedVector<T>(minInput1Data,
+ minInput1TensorInfo.GetQuantizationScale(),
+ minInput1TensorInfo.GetQuantizationOffset());
+ std::vector<T> outputExpected = armnnUtils::QuantizedVector<T>(outputExpectedData,
+ outputTensorInfo.GetQuantizationScale(),
+ outputTensorInfo.GetQuantizationOffset());
+ std::vector<T> outputActual(outputTensorInfo.GetNumElements());
+
+ // Create a network
+ INetworkPtr network = INetwork::Create();
+
+ // add layers to network
+ IConnectableLayer* const input0Layer = network->AddInputLayer(0);
+ IConnectableLayer* const input1Layer = network->AddInputLayer(1);
+ IConnectableLayer* const mulLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Mul, "mul");
+
+ auto constMaxInput1Tensor = ConstTensor(maxInput1TensorInfo, maxInput1);
+ IConnectableLayer* const maxInput1Layer = network->AddConstantLayer(constMaxInput1Tensor, "maxInput1");
+ IConnectableLayer* const maxLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Maximum, "max");
+
+ auto constMinInput1Tensor = ConstTensor(minInput1TensorInfo, minInput1);
+ IConnectableLayer* const minInput1Layer = network->AddConstantLayer(constMinInput1Tensor, "minInput1");
+ IConnectableLayer* const minLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Minimum, "min");
+
+ IConnectableLayer* const outputLayer = network->AddOutputLayer(0);
+
+ // set tensor info to output slots
+ input0Layer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo);
+ input1Layer->GetOutputSlot(0).SetTensorInfo(input1TensorInfo);
+ mulLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo);
+ maxInput1Layer->GetOutputSlot(0).SetTensorInfo(maxInput1TensorInfo);
+ maxLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo);
+ minInput1Layer->GetOutputSlot(0).SetTensorInfo(minInput1TensorInfo);
+ minLayer->GetOutputSlot(0).SetTensorInfo(outputTensorInfo);
+
+ // connect layers.
+ // In0 In1
+ // \ /
+ // Mul maxIn1
+ // | /
+ // Max minIn1
+ // | /
+ // Min
+ // |
+ // Out
+ input0Layer ->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(0));
+ input1Layer ->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(1));
+ mulLayer ->GetOutputSlot(0).Connect(maxLayer->GetInputSlot(0));
+ maxInput1Layer->GetOutputSlot(0).Connect(maxLayer->GetInputSlot(1));
+ maxLayer ->GetOutputSlot(0).Connect(minLayer->GetInputSlot(0));
+ minInput1Layer->GetOutputSlot(0).Connect(minLayer->GetInputSlot(1));
+ minLayer ->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
+
+ // Create ArmNN runtime
+ IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions());
+
+ // Optimise ArmNN network
+ IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec());
+
+ Graph& graph = GetGraphForTesting(optNet.get());
+
+ auto checkMul = [ ](const armnn::Layer* const layer) -> bool
+ {
+ auto* mulLayer = PolymorphicDowncast<const ElementwiseBinaryLayer*>(layer);
+
+ return IsLayerOfType<ElementwiseBinaryLayer>(layer) &&
+ (mulLayer->GetParameters().m_Operation == BinaryOperation::Mul);
+ };
+
+ auto checkBoundedRelu = [ ](const armnn::Layer* const layer) -> bool
+ {
+ auto* activationLayer = PolymorphicDowncast<const ActivationLayer*>(layer);
+
+ return IsLayerOfType<ActivationLayer>(layer) &&
+ (activationLayer->GetParameters().m_Function == ActivationFunction::BoundedReLu);
+ };
+
+ // 2 inputs, mul, activation(in CpuRef and CpuAcc), output
+ CHECK((graph.GetNumLayers() == numLayers));
+ if (numLayers == 4)
+ {
+ CHECK(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<InputLayer>,
+ checkMul,
+ &IsLayerOfType<OutputLayer>));
+ }
+ else if (numLayers == 5)
+ {
+ CHECK(CheckSequence(graph.cbegin(),
+ graph.cend(),
+ &IsLayerOfType<InputLayer>,
+ &IsLayerOfType<InputLayer>,
+ checkMul,
+ checkBoundedRelu,
+ &IsLayerOfType<OutputLayer>));
+ }
+
+ // Load network into runtime
+ NetworkId networkIdentifier;
+ run->LoadNetwork(networkIdentifier, std::move(optNet));
+
+ // Create input and output tensors
+ InputTensors inputTensors
+ {
+ {0, ConstTensor(input0TensorInfo, input0.data())},
+ {1, ConstTensor(input1TensorInfo, input1.data())}
+ };
+ OutputTensors outputTensors
+ {
+ {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputActual.data())}
+ };
+
+ // Run inference
+ run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors);
+
+ // Checks the results
+ CHECK(outputActual == outputExpected);
+}
+}
+
+TEST_SUITE("Optimizer")
+{
+#if defined(ARMNNREF_ENABLED)
+TEST_CASE("FuseMulMaxMinTest_Float_CpuRef")
+{
+ MulMaxMinTest<DataType::Float32>(Compute::CpuRef, 5);
+}
+#endif
+#if defined(ARMCOMPUTENEON_ENABLED)
+TEST_CASE("FuseMulMaxMinTest_Float_CpuAcc")
+{
+ MulMaxMinTest<DataType::Float32>(Compute::CpuAcc, 5);
+}
+#endif
+#if defined(ARMCOMPUTECL_ENABLED)
+TEST_CASE("FuseMulMaxMinTest_Float_GpuAcc")
+{
+ MulMaxMinTest<DataType::Float32>(Compute::GpuAcc, 4);
+}
+#endif
+} \ No newline at end of file
diff --git a/src/armnnUtils/CompatibleTypes.cpp b/src/armnnUtils/CompatibleTypes.cpp
index 9a3251d293..726051608d 100644
--- a/src/armnnUtils/CompatibleTypes.cpp
+++ b/src/armnnUtils/CompatibleTypes.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2019-2021, 2024 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <armnn/Types.hpp>
@@ -62,4 +62,10 @@ bool CompatibleTypes<int32_t>(DataType dataType)
return dataType == DataType::Signed32;
}
+template<>
+bool CompatibleTypes<int64_t>(DataType dataType)
+{
+ return dataType == DataType::Signed64;
+}
+
} //namespace armnnUtils