diff options
author | Tracy Narine <tracy.narine@arm.com> | 2023-09-20 14:19:07 +0100 |
---|---|---|
committer | Tracy Narine <tracy.narine@arm.com> | 2023-09-28 14:25:16 +0100 |
commit | 6440ce89abb06e090d2b3cf91bafc14277072475 (patch) | |
tree | c55682891a0f01f3edbf5dad58720ded7af3fc64 /src/armnn/test/optimizations/AddMulAddTests.cpp | |
parent | 9a418d850333119e219fb05addc57b56cdc60a7e (diff) | |
download | armnn-6440ce89abb06e090d2b3cf91bafc14277072475.tar.gz |
IVGCVSW-7504 Create a backend specific optimization to fuse ADD+MUL+Add+(Activation) in CpuAcc
* Adding CpuAcc backend optimization to fuse add+mul+add into one layer
* Tests added/enhanced
* Also added optional extended parameter to Graph::Print()
and throw macros that could be used in place of assert
Signed-off-by: Tracy Narine <tracy.narine@arm.com>
Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: I5f8d094b969a130d8c2c7b4da07426313a9fea76
Diffstat (limited to 'src/armnn/test/optimizations/AddMulAddTests.cpp')
-rw-r--r-- | src/armnn/test/optimizations/AddMulAddTests.cpp | 311 |
1 files changed, 311 insertions, 0 deletions
diff --git a/src/armnn/test/optimizations/AddMulAddTests.cpp b/src/armnn/test/optimizations/AddMulAddTests.cpp new file mode 100644 index 0000000000..fababa030a --- /dev/null +++ b/src/armnn/test/optimizations/AddMulAddTests.cpp @@ -0,0 +1,311 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <GraphUtils.hpp> +#include <TestUtils.hpp> +#include <ResolveType.hpp> +#include <armnnUtils/QuantizeHelper.hpp> + +#include <armnn/INetwork.hpp> + +#include <doctest/doctest.h> + +using namespace armnn; + +namespace +{ +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +void AddMulAddTest(Compute backendId, bool addOutput, bool addRelu) +{ + const TensorInfo input0TensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 10 : 0, + true); + const TensorInfo input1TensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 11 : 0, + true); + const TensorInfo mulInput1TensorInfo({ 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 12 : 0, + true); + const TensorInfo addInput1TensorInfo({ 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.25f : 1, + IsQuantizedType<T>() ? 13 : 0, + true); + const TensorInfo output0TensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.5f : 1, + IsQuantizedType<T>() ? 14 : 0); + const TensorInfo output1TensorInfo({ 1, 2, 2, 3 }, + ArmnnType, + IsQuantizedType<T>() ? 0.5f : 1, + IsQuantizedType<T>() ? 15 : 0); + + std::vector<float> input0Data + { + 0.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, -1.0f, + -2.0f, -2.0f, -2.0f + }; + std::vector<float> input1Data + { + 0.0f, 0.0f, 0.0f, + 1.0f, 1.0f, 1.0f, + -1.0f, -1.0f, -1.0f, + -2.0f, -2.0f, -2.0f + }; + std::vector<float> mulInput1Data + { + 2.0f, 1.0f, 1.0f + }; + std::vector<float> addInput1Data + { + 3.0f, 0.0f, 0.0f + }; + std::vector<float> output0ExpectedData = + { + 0.0f, 0.0f, 0.0f, + 2.0f, 2.0f, 2.0f, + -2.0f, -2.0f, -2.0f, + -4.0f, -4.0f, -4.0f + }; + std::vector<float> output1ExpectedData = + { + 3.0f, 0.0f, 0.0f, + 7.0f, 2.0f, 2.0f, + -1.0f, -2.0f, -2.0f, + -5.0f, -4.0f, -4.0f + }; + std::vector<float> output1ReluExpectedData = + { + 3.0f, 0.0f, 0.0f, + 7.0f, 2.0f, 2.0f, + 0.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 0.0f + }; + + std::vector<T> input0 = armnnUtils::QuantizedVector<T>(input0Data, + input0TensorInfo.GetQuantizationScale(), + input0TensorInfo.GetQuantizationOffset()); + std::vector<T> input1 = armnnUtils::QuantizedVector<T>(input1Data, + input1TensorInfo.GetQuantizationScale(), + input1TensorInfo.GetQuantizationOffset()); + std::vector<T> mulInput1 = armnnUtils::QuantizedVector<T>(mulInput1Data, + mulInput1TensorInfo.GetQuantizationScale(), + mulInput1TensorInfo.GetQuantizationOffset()); + std::vector<T> addInput1 = armnnUtils::QuantizedVector<T>(addInput1Data, + addInput1TensorInfo.GetQuantizationScale(), + addInput1TensorInfo.GetQuantizationOffset()); + std::vector<T> output0Expected = armnnUtils::QuantizedVector<T>(output0ExpectedData, + output0TensorInfo.GetQuantizationScale(), + output0TensorInfo.GetQuantizationOffset()); + std::vector<T> output1Expected = armnnUtils::QuantizedVector<T>(output1ExpectedData, + output1TensorInfo.GetQuantizationScale(), + output1TensorInfo.GetQuantizationOffset()); + std::vector<T> output1ReluExpected = armnnUtils::QuantizedVector<T>(output1ReluExpectedData, + output1TensorInfo.GetQuantizationScale(), + output1TensorInfo.GetQuantizationOffset()); + + std::vector<T> output0Actual(output0TensorInfo.GetNumElements()); + std::vector<T> output1Actual(output1TensorInfo.GetNumElements()); + + // Create a network + INetworkPtr network = INetwork::Create(); + + IConnectableLayer* const input0Layer = network->AddInputLayer(0); + IConnectableLayer* const input1Layer = network->AddInputLayer(1); + IConnectableLayer* const add0Layer = network->AddElementwiseBinaryLayer(BinaryOperation::Add, "add0"); + IConnectableLayer* outputAddLayer = nullptr; + if (addOutput) + { + outputAddLayer = network->AddOutputLayer(0); + } + + auto constMulInput1Tensor = armnn::ConstTensor(mulInput1TensorInfo, mulInput1); + IConnectableLayer* const mulInput1Layer = network->AddConstantLayer(constMulInput1Tensor, "mulInput1"); + IConnectableLayer* const mulLayer = network->AddElementwiseBinaryLayer(BinaryOperation::Mul, "mul"); + + auto constAddInput1Tensor = armnn::ConstTensor(addInput1TensorInfo, addInput1); + IConnectableLayer* const addInput1Layer = network->AddConstantLayer(constAddInput1Tensor, "addInput1"); + IConnectableLayer* const add1Layer = network->AddElementwiseBinaryLayer(BinaryOperation::Add, "add1"); + IConnectableLayer* const outputLayer = network->AddOutputLayer(1); + + IConnectableLayer* relu = nullptr; + if (addRelu) + { + relu = network->AddActivationLayer(ActivationFunction::ReLu, "relu"); + } + + input0Layer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + input1Layer->GetOutputSlot(0).SetTensorInfo(input1TensorInfo); + add0Layer->GetOutputSlot(0).SetTensorInfo(output0TensorInfo); + mulInput1Layer->GetOutputSlot(0).SetTensorInfo(mulInput1TensorInfo); + mulLayer->GetOutputSlot(0).SetTensorInfo(input0TensorInfo); + addInput1Layer->GetOutputSlot(0).SetTensorInfo(addInput1TensorInfo); + add1Layer->GetOutputSlot(0).SetTensorInfo(output1TensorInfo); + if (addRelu) + { + relu->GetOutputSlot(0).SetTensorInfo(output1TensorInfo); + } + + input0Layer->GetOutputSlot(0).Connect(add0Layer->GetInputSlot(0)); + input1Layer->GetOutputSlot(0).Connect(add0Layer->GetInputSlot(1)); + if (addOutput) + { + add0Layer->GetOutputSlot(0).Connect(outputAddLayer->GetInputSlot(0)); + } + + add0Layer->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(0)); + mulInput1Layer->GetOutputSlot(0).Connect(mulLayer->GetInputSlot(1)); + mulLayer->GetOutputSlot(0).Connect(add1Layer->GetInputSlot(0)); + addInput1Layer->GetOutputSlot(0).Connect(add1Layer->GetInputSlot(1)); + + if (addRelu) + { + add1Layer->GetOutputSlot(0).Connect(relu->GetInputSlot(0)); + relu->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + } + else + { + add1Layer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0)); + } + + // Create ArmNN runtime + IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); + + // Optimise ArmNN network + IOptimizedNetworkPtr optNet = Optimize(*network, {backendId}, run->GetDeviceSpec()); + + Graph& graph = GetGraphForTesting(optNet.get()); + + // There are 9 add layers above, so we should be left with 7 layers in the graph after the optimization. + // Numbers reduced by 1 in addOutput == false case + unsigned int expectedLayerNum = (addOutput) ? 7 : 6; + CHECK((graph.GetNumLayers() == expectedLayerNum)); + + if (addOutput) + { + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<InputLayer>, + &IsLayerOfType<ConstantLayer>, + &IsLayerOfType<ConstantLayer>, + &IsLayerOfType<FusedLayer>, + &IsLayerOfType<OutputLayer>, + &IsLayerOfType<OutputLayer>)); + } + else + { + CHECK(CheckSequence(graph.cbegin(), + graph.cend(), + &IsLayerOfType<InputLayer>, + &IsLayerOfType<InputLayer>, + &IsLayerOfType<ConstantLayer>, + &IsLayerOfType<ConstantLayer>, + &IsLayerOfType<FusedLayer>, + &IsLayerOfType<OutputLayer>)); + } + + // Load network into runtime + NetworkId networkIdentifier; + run->LoadNetwork(networkIdentifier, std::move(optNet)); + + // Create input and output tensors + InputTensors inputTensors + { + {0, armnn::ConstTensor(input0TensorInfo, input0.data())}, + {1, armnn::ConstTensor(input1TensorInfo, input1.data())} + }; + OutputTensors outputTensors; + if (addOutput) + { + outputTensors.push_back( + {0, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), output0Actual.data())}); + outputTensors.push_back( + {1, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 1), output1Actual.data())}); + } + else + { + outputTensors.push_back( + {1, armnn::Tensor(run->GetOutputTensorInfo(networkIdentifier, 1), output1Actual.data())}); + } + + // Run inference + run->EnqueueWorkload(networkIdentifier, inputTensors, outputTensors); + + // Checks the results + if (addOutput) + { + CHECK(output0Actual == output0Expected); + } + + if (addRelu) + { + CHECK(output1Actual == output1ReluExpected); + } + else + { + CHECK(output1Actual == output1Expected); + } + +} +} + +#if defined(ARMCOMPUTENEON_ENABLED) +TEST_SUITE("Optimizer_AddMulAdd") +{ + +TEST_CASE("AddMulAdd2OutputsFloat32Test") +{ + AddMulAddTest<DataType::Float32>(Compute::CpuAcc, true, false); +} + +TEST_CASE("AddMulAdd2OutputsInt8Test") +{ + AddMulAddTest<DataType::QAsymmS8>(Compute::CpuAcc, true, false); +} + +TEST_CASE("AddMulAdd2OutputsUint8Test") +{ + AddMulAddTest<DataType::QAsymmU8>(Compute::CpuAcc, true, false); +} + +TEST_CASE("AddMulAdd1OutputFloat32Test") +{ + AddMulAddTest<DataType::Float32>(Compute::CpuAcc, false, false); +} + +TEST_CASE("AddMulAdd1OutputInt8Test") +{ + AddMulAddTest<DataType::QAsymmS8>(Compute::CpuAcc, false, false); +} + +TEST_CASE("AddMulAdd1OutputUint8Test") +{ + AddMulAddTest<DataType::QAsymmU8>(Compute::CpuAcc, false, false); +} + +// +// Relu tests +// +TEST_CASE("AddMulAddRelu2OutputsFloat32Test") +{ + AddMulAddTest<DataType::Float32>(Compute::CpuAcc, true, true); +} + +TEST_CASE("AddMulAddRelu1OutputFloat32Test") +{ + AddMulAddTest<DataType::Float32>(Compute::CpuAcc, false, true); +} + +} +#endif
\ No newline at end of file |