From a2747487fbe7eb6d9f5357c6d16c32355ed6e01c Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Tue, 9 Feb 2021 10:28:54 +0000 Subject: MLCE-347 'REDUCE_MIN, REDUCE_MAX, REDUCE_SUM Support' * Added TfLiteParser support for REDUCE_MIN and REDUCE_MAX operators * Added ACL workloads support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators * Added TfLite Delegate support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators Signed-off-by: Sadik Armagan Change-Id: I8085d59946bfd4ab78a59a61f899031ae53371a8 --- CMakeLists.txt | 3 +- delegate/CMakeLists.txt | 3 + delegate/src/Reduce.hpp | 133 ++++++++ delegate/src/armnn_delegate.cpp | 19 ++ delegate/src/test/ReduceTest.cpp | 354 +++++++++++++++++++++ delegate/src/test/ReduceTestHelper.hpp | 186 +++++++++++ include/armnn/BackendHelper.hpp | 5 + src/armnn/BackendHelper.cpp | 8 + src/armnnTfLiteParser/TfLiteParser.cpp | 28 +- src/armnnTfLiteParser/TfLiteParser.hpp | 3 + src/armnnTfLiteParser/test/Reduce.cpp | 193 +++++++++++ src/backends/aclCommon/ArmComputeUtils.hpp | 12 + src/backends/backendsCommon/common.mk | 1 + src/backends/backendsCommon/test/CMakeLists.txt | 2 + src/backends/backendsCommon/test/LayerTests.hpp | 1 + .../test/layerTests/ReduceSumTestImpl.cpp | 7 +- .../test/layerTests/ReductionTestImpl.cpp | 315 ++++++++++++++++++ .../test/layerTests/ReductionTestImpl.hpp | 43 +++ src/backends/cl/ClLayerSupport.cpp | 13 + src/backends/cl/ClLayerSupport.hpp | 5 + src/backends/cl/ClWorkloadFactory.cpp | 6 + src/backends/cl/ClWorkloadFactory.hpp | 3 + src/backends/cl/backend.mk | 1 + src/backends/cl/test/ClLayerTests.cpp | 15 + src/backends/cl/workloads/CMakeLists.txt | 2 + src/backends/cl/workloads/ClReduceWorkload.cpp | 66 ++++ src/backends/cl/workloads/ClReduceWorkload.hpp | 30 ++ src/backends/cl/workloads/ClWorkloads.hpp | 1 + src/backends/neon/NeonLayerSupport.cpp | 13 + src/backends/neon/NeonLayerSupport.hpp | 5 + src/backends/neon/NeonWorkloadFactory.cpp | 6 + src/backends/neon/NeonWorkloadFactory.hpp | 3 + src/backends/neon/backend.mk | 1 + src/backends/neon/test/NeonLayerTests.cpp | 15 + src/backends/neon/workloads/CMakeLists.txt | 2 + src/backends/neon/workloads/NeonReduceWorkload.cpp | 66 ++++ src/backends/neon/workloads/NeonReduceWorkload.hpp | 30 ++ src/backends/neon/workloads/NeonWorkloads.hpp | 1 + src/backends/reference/test/RefLayerTests.cpp | 9 + src/backends/reference/workloads/Reduce.cpp | 78 +++-- 40 files changed, 1650 insertions(+), 37 deletions(-) create mode 100644 delegate/src/Reduce.hpp create mode 100644 delegate/src/test/ReduceTest.cpp create mode 100644 delegate/src/test/ReduceTestHelper.hpp create mode 100644 src/armnnTfLiteParser/test/Reduce.cpp create mode 100644 src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp create mode 100644 src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp create mode 100644 src/backends/cl/workloads/ClReduceWorkload.cpp create mode 100644 src/backends/cl/workloads/ClReduceWorkload.hpp create mode 100644 src/backends/neon/workloads/NeonReduceWorkload.cpp create mode 100644 src/backends/neon/workloads/NeonReduceWorkload.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a71de7b5b..c862c55687 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -760,7 +760,6 @@ if(BUILD_UNIT_TESTS) src/armnnTfParser/test/Convolution2d.cpp src/armnnTfParser/test/Concat.cpp src/armnnTfParser/test/ConcatOfConcats.cpp - src/armnnTfLiteParser/test/DepthToSpace.cpp src/armnnTfParser/test/DepthwiseConvolution2d.cpp src/armnnTfParser/test/Equal.cpp src/armnnTfParser/test/ExpandDims.cpp @@ -808,6 +807,7 @@ if(BUILD_UNIT_TESTS) src/armnnTfLiteParser/test/Constant.cpp src/armnnTfLiteParser/test/Conv2D.cpp src/armnnTfLiteParser/test/DepthwiseConvolution2D.cpp + src/armnnTfLiteParser/test/DepthToSpace.cpp src/armnnTfLiteParser/test/Dequantize.cpp src/armnnTfLiteParser/test/DetectionPostProcess.cpp src/armnnTfLiteParser/test/Div.cpp @@ -825,6 +825,7 @@ if(BUILD_UNIT_TESTS) src/armnnTfLiteParser/test/Neg.cpp src/armnnTfLiteParser/test/Pack.cpp src/armnnTfLiteParser/test/Pad.cpp + src/armnnTfLiteParser/test/Reduce.cpp src/armnnTfLiteParser/test/Reshape.cpp src/armnnTfLiteParser/test/ResizeBilinear.cpp src/armnnTfLiteParser/test/ResizeNearestNeighbor.cpp diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt index 777702e0b4..74390c8a93 100644 --- a/delegate/CMakeLists.txt +++ b/delegate/CMakeLists.txt @@ -37,6 +37,7 @@ list(APPEND armnnDelegate_sources src/Pooling.hpp src/Quantization.hpp src/Redefine.hpp + src/Reduce.hpp src/Resize.hpp src/Round.hpp src/Slice.hpp @@ -143,6 +144,8 @@ if(BUILD_UNIT_TESTS) src/test/QuantizationTest.cpp src/test/QuantizationTestHelper.hpp src/test/RedefineTestHelper.hpp + src/test/ReduceTest.cpp + src/test/ReduceTestHelper.hpp src/test/ReshapeTest.cpp src/test/ResizeTest.cpp src/test/ResizeTestHelper.hpp diff --git a/delegate/src/Reduce.hpp b/delegate/src/Reduce.hpp new file mode 100644 index 0000000000..13a11d3e61 --- /dev/null +++ b/delegate/src/Reduce.hpp @@ -0,0 +1,133 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include +#include +#include + +namespace armnnDelegate +{ + +TfLiteStatus VisitReduceOperator(DelegateData& delegateData, + TfLiteContext* tfLiteContext, + TfLiteNode* tfLiteNode, + int nodeIndex, + int32_t reduceOperatorCode) +{ + TF_LITE_ENSURE_STATUS(ValidateNumInputs(tfLiteContext, tfLiteNode, 2, nodeIndex)); + TF_LITE_ENSURE_STATUS(ValidateNumOutputs(tfLiteContext, tfLiteNode, 1, nodeIndex)); + + const TfLiteTensor* tfLiteTensors = tfLiteContext->tensors; + const TfLiteTensor& tfLiteInputTensor = tfLiteTensors[tfLiteNode->inputs->data[0]]; + if (!IsValid(tfLiteContext, tfLiteInputTensor, reduceOperatorCode, nodeIndex)) + { + return kTfLiteError; + } + + const TfLiteTensor& tfLiteOutputTensor = tfLiteTensors[tfLiteNode->outputs->data[0]]; + if (!IsValid(tfLiteContext, tfLiteOutputTensor, reduceOperatorCode, nodeIndex)) + { + return kTfLiteError; + } + + const armnn::TensorInfo& inputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteInputTensor); + const armnn::TensorInfo& outputTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteOutputTensor); + + // Get const axis value from model and set it to descriptor. + const TfLiteTensor& tfLiteAxisTensor = tfLiteTensors[tfLiteNode->inputs->data[1]]; + if (!IsValid(tfLiteContext, tfLiteAxisTensor, reduceOperatorCode, nodeIndex)) + { + return kTfLiteError; + } + + const armnn::TensorInfo& axisTensorInfo = GetTensorInfoForTfLiteTensor(tfLiteAxisTensor); + auto* axisTensorData = tflite::GetTensorData(&tfLiteAxisTensor); + + std::vector axis; + // Add axis data to vector to be converter to unsigned int and assigned to descriptor axis. + if (axisTensorData != nullptr) + { + for (unsigned int i = 0; i < axisTensorInfo.GetNumElements(); ++i) + { + axis.emplace_back(axisTensorData[i]); + } + } + else + { + for (unsigned int i = 0; i < inputTensorInfo.GetNumDimensions(); ++i) + { + axis.push_back(i); + } + } + + // Convert the axis to unsigned int and remove duplicates. + unsigned int rank = inputTensorInfo.GetNumDimensions(); + std::set uniqueAxis; + std::transform(axis.begin(), + axis.end(), + std::inserter(uniqueAxis, uniqueAxis.begin()), + [rank](int i)->unsigned int{ return (i + rank) % rank; }); + + armnn::ReduceDescriptor desc; + desc.m_vAxis.assign(uniqueAxis.begin(), uniqueAxis.end()); + + auto* reducerParameters = reinterpret_cast(tfLiteNode->builtin_data); + desc.m_KeepDims = reducerParameters->keep_dims; + if (reduceOperatorCode == kTfLiteBuiltinReduceMax) + { + desc.m_ReduceOperation = armnn::ReduceOperation::Max; + } + else if (reduceOperatorCode == kTfLiteBuiltinReduceMin) + { + desc.m_ReduceOperation = armnn::ReduceOperation::Min; + } + else if (reduceOperatorCode == kTfLiteBuiltinSum) + { + desc.m_ReduceOperation = armnn::ReduceOperation::Sum; + } + else + { + TF_LITE_MAYBE_KERNEL_LOG( + tfLiteContext, + "TfLiteArmnnDelegate: Unsupported Reduction Operator #%d node #%d: ", + reduceOperatorCode, nodeIndex); + return kTfLiteError; + } + + bool isSupported = false; + auto validateFunc = [&](const armnn::TensorInfo& outInfo, bool& isSupported) + { + FORWARD_LAYER_SUPPORT_FUNC(__func__, + tfLiteContext, + IsReduceSupported, + delegateData.m_Backends, + isSupported, + inputTensorInfo, + outInfo, + desc); + }; + + if (!delegateData.m_Network) + { + validateFunc(outputTensorInfo, isSupported); + return isSupported ? kTfLiteOk : kTfLiteError; + } + + // Add an Reduce layer + armnn::IConnectableLayer* layer = delegateData.m_Network->AddReduceLayer(desc); + ARMNN_ASSERT(layer != nullptr); + + armnn::IOutputSlot& outputSlot = layer->GetOutputSlot(0); + outputSlot.SetTensorInfo(outputTensorInfo); + + // Connect + return Connect(layer, tfLiteNode, delegateData); +} + +} // namespace armnnDelegate diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp index 3ebc0cc6b5..2b07fc7098 100644 --- a/delegate/src/armnn_delegate.cpp +++ b/delegate/src/armnn_delegate.cpp @@ -25,6 +25,7 @@ #include "Pooling.hpp" #include "Quantization.hpp" #include "Redefine.hpp" +#include "Reduce.hpp" #include "Resize.hpp" #include "Round.hpp" #include "Slice.hpp" @@ -733,6 +734,18 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData, tfLiteNode, nodeIndex, kTfLiteBuiltinRank); + case kTfLiteBuiltinReduceMax: + return VisitReduceOperator(delegateData, + tfLiteContext, + tfLiteNode, + nodeIndex, + kTfLiteBuiltinReduceMax); + case kTfLiteBuiltinReduceMin: + return VisitReduceOperator(delegateData, + tfLiteContext, + tfLiteNode, + nodeIndex, + kTfLiteBuiltinReduceMin); case kTfLiteBuiltinRelu: return VisitActivationOperator(delegateData, tfLiteContext, @@ -805,6 +818,12 @@ TfLiteStatus ArmnnSubgraph::VisitNode(DelegateData& delegateData, tfLiteNode, nodeIndex, kTfLiteBuiltinStridedSlice); + case kTfLiteBuiltinSum: + return VisitReduceOperator(delegateData, + tfLiteContext, + tfLiteNode, + nodeIndex, + kTfLiteBuiltinSum); case kTfLiteBuiltinTranspose: return VisitTransposeOperator(delegateData, tfLiteContext, diff --git a/delegate/src/test/ReduceTest.cpp b/delegate/src/test/ReduceTest.cpp new file mode 100644 index 0000000000..49608b6a2c --- /dev/null +++ b/delegate/src/test/ReduceTest.cpp @@ -0,0 +1,354 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ReduceTestHelper.hpp" + +#include + +#include +#include + +#include + +namespace armnnDelegate +{ + +void ReduceUint8KeepDimsTest(tflite::BuiltinOperator reduceOperatorCode, + std::vector& backends, + std::vector& expectedOutputValues) +{ + std::vector input0Shape { 1, 1, 2, 3 }; + std::vector input1Shape { 1 }; + std::vector expectedOutputShape { 1, 1, 1, 3 }; + + std::vector input0Values { 1, 2, 3, + 4, 3, 1 }; // Inputs + std::vector input1Values { 2 }; // Axis + + ReduceTest(reduceOperatorCode, + ::tflite::TensorType_UINT8, + backends, + input0Shape, + input1Shape, + expectedOutputShape, + input0Values, + input1Values, + expectedOutputValues, + true); +} + +void ReduceUint8Test(tflite::BuiltinOperator reduceOperatorCode, + std::vector& backends, + std::vector& expectedOutputValues) +{ + std::vector input0Shape { 1, 1, 2, 3 }; + std::vector input1Shape { 1 }; + std::vector expectedOutputShape { 1, 1, 3 }; + + std::vector input0Values { 1, 2, 3, + 4, 3, 1 }; // Inputs + std::vector input1Values { 2 }; // Axis + + ReduceTest(reduceOperatorCode, + ::tflite::TensorType_UINT8, + backends, + input0Shape, + input1Shape, + expectedOutputShape, + input0Values, + input1Values, + expectedOutputValues, + false); +} + +void ReduceFp32KeepDimsTest(tflite::BuiltinOperator reduceOperatorCode, + std::vector& backends, + std::vector& expectedOutputValues) +{ + std::vector input0Shape { 1, 1, 2, 3 }; + std::vector input1Shape { 1 }; + std::vector expectedOutputShape { 1, 1, 1, 3 }; + + std::vector input0Values { 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f }; // Inputs + std::vector input1Values { 2 }; // Axis + + ReduceTest(reduceOperatorCode, + ::tflite::TensorType_FLOAT32, + backends, + input0Shape, + input1Shape, + expectedOutputShape, + input0Values, + input1Values, + expectedOutputValues, + true); +} + +void ReduceFp32Test(tflite::BuiltinOperator reduceOperatorCode, + std::vector& backends, + std::vector& expectedOutputValues) +{ + std::vector input0Shape { 1, 1, 2, 3 }; + std::vector input1Shape { 1 }; + std::vector expectedOutputShape { 1, 1, 3 }; + + std::vector input0Values { 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f }; // Inputs + std::vector input1Values { 2 }; // Axis + + ReduceTest(reduceOperatorCode, + ::tflite::TensorType_FLOAT32, + backends, + input0Shape, + input1Shape, + expectedOutputShape, + input0Values, + input1Values, + expectedOutputValues, + false); +} + +// REDUCE_MAX Tests +TEST_SUITE("ReduceMax_CpuRefTests") +{ + +TEST_CASE ("ReduceMax_Uint8_KeepDims_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 4, 3, 3 }; + ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Uint8_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 4, 3, 3 }; + ReduceUint8Test(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Fp32_KeepDims_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 1001.0f, 1002.0f, 1003.0f }; + ReduceFp32KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Fp32_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 1001.0f, 1002.0f, 1003.0f }; + ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +} // End of ReduceMax_CpuRefTests + +TEST_SUITE("ReduceMax_CpuAccTests") +{ + +TEST_CASE ("ReduceMax_Uint8_KeepDims_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 4, 3, 3 }; + ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Uint8_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 4, 3, 3 }; + ReduceUint8Test(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + + +TEST_CASE ("ReduceMax_Fp32_KeepDims_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 1001.0f, 1002.0f, 1003.0f }; + ReduceFp32KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Fp32_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 1001.0f, 1002.0f, 1003.0f }; + ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +} // End of ReduceMax_CpuAccTests + +TEST_SUITE("ReduceMax_GpuAccTests") +{ + +TEST_CASE ("ReduceMax_Uint8_KeepDims_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 4, 3, 3 }; + ReduceUint8KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Uint8_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 4, 3, 3 }; + ReduceUint8Test(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + + +TEST_CASE ("ReduceMax_Fp32_KeepDims_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 1001.0f, 1002.0f, 1003.0f }; + ReduceFp32KeepDimsTest(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +TEST_CASE ("ReduceMax_Fp32_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 1001.0f, 1002.0f, 1003.0f }; + ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MAX, + backends, + expectedOutputValues); +} + +} // End of ReduceMax_GpuAccTests + +// REDUCE_MIN Tests +TEST_SUITE("ReduceMin_CpuRefTests") +{ + +TEST_CASE ("ReduceMin_Fp32_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 10.0f, 11.0f, 12.0f }; + ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MIN, + backends, + expectedOutputValues); +} + +} // End of ReduceMin_CpuRefTests + +TEST_SUITE("ReduceMin_CpuAccTests") +{ + +TEST_CASE ("ReduceMin_Fp32_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 10.0f, 11.0f, 12.0f }; + ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MIN, + backends, + expectedOutputValues); +} + +} // End of ReduceMin_CpuAccTests + +TEST_SUITE("ReduceMin_GpuAccTests") +{ + +TEST_CASE ("ReduceMin_Fp32_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 10.0f, 11.0f, 12.0f }; + ReduceFp32Test(tflite::BuiltinOperator_REDUCE_MIN, + backends, + expectedOutputValues); +} + +} // End of ReduceMin_GpuAccTests + +// SUM Tests +TEST_SUITE("Sum_CpuRefTests") +{ + +TEST_CASE ("Sum_Uint8_KeepDims_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 5, 5, 4 }; + ReduceUint8KeepDimsTest(tflite::BuiltinOperator_SUM, + backends, + expectedOutputValues); +} + +TEST_CASE ("Sum_Fp32_CpuRef_Test") +{ + std::vector backends = {armnn::Compute::CpuRef}; + std::vector expectedOutputValues { 1011.0f, 1013.0f, 1015.0f }; + ReduceFp32Test(tflite::BuiltinOperator_SUM, + backends, + expectedOutputValues); +} + +} // End of Sum_CpuRefTests + +TEST_SUITE("Sum_CpuAccTests") +{ + +TEST_CASE ("Sum_Uint8_KeepDims_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 5, 5, 4 }; + ReduceUint8KeepDimsTest(tflite::BuiltinOperator_SUM, + backends, + expectedOutputValues); +} + +TEST_CASE ("Sum_Fp32_CpuAcc_Test") +{ + std::vector backends = {armnn::Compute::CpuAcc}; + std::vector expectedOutputValues { 1011.0f, 1013.0f, 1015.0f }; + ReduceFp32Test(tflite::BuiltinOperator_SUM, + backends, + expectedOutputValues); +} + +} // End of Sum_CpuAccTests + +TEST_SUITE("Sum_GpuAccTests") +{ + +TEST_CASE ("Sum_Uint8_KeepDims_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 5, 5, 4 }; + ReduceUint8KeepDimsTest(tflite::BuiltinOperator_SUM, + backends, + expectedOutputValues); +} + +TEST_CASE ("Sum_Fp32_GpuAcc_Test") +{ + std::vector backends = {armnn::Compute::GpuAcc}; + std::vector expectedOutputValues { 1011.0f, 1013.0f, 1015.0f }; + ReduceFp32Test(tflite::BuiltinOperator_SUM, + backends, + expectedOutputValues); +} + +} // End of Sum_GpuAccTests + + +} // namespace armnnDelegate \ No newline at end of file diff --git a/delegate/src/test/ReduceTestHelper.hpp b/delegate/src/test/ReduceTestHelper.hpp new file mode 100644 index 0000000000..b41fcfa39b --- /dev/null +++ b/delegate/src/test/ReduceTestHelper.hpp @@ -0,0 +1,186 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TestUtils.hpp" + +#include + +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace +{ + +std::vector CreateReduceTfLiteModel(tflite::BuiltinOperator reduceOperatorCode, + tflite::TensorType tensorType, + std::vector& input0TensorShape, + std::vector& input1TensorShape, + const std::vector & outputTensorShape, + std::vector& axisData, + const bool keepDims, + float quantScale = 1.0f, + int quantOffset = 0) +{ + using namespace tflite; + flatbuffers::FlatBufferBuilder flatBufferBuilder; + + std::array, 2> buffers; + buffers[0] = CreateBuffer(flatBufferBuilder, flatBufferBuilder.CreateVector({})); + buffers[1] = CreateBuffer(flatBufferBuilder, + flatBufferBuilder.CreateVector(reinterpret_cast(axisData.data()), + sizeof(int32_t) * axisData.size())); + + auto quantizationParameters = + CreateQuantizationParameters(flatBufferBuilder, + 0, + 0, + flatBufferBuilder.CreateVector({ quantScale }), + flatBufferBuilder.CreateVector({ quantOffset })); + + std::array, 3> tensors; + tensors[0] = CreateTensor(flatBufferBuilder, + flatBufferBuilder.CreateVector(input0TensorShape.data(), + input0TensorShape.size()), + tensorType, + 0, + flatBufferBuilder.CreateString("input"), + quantizationParameters); + + tensors[1] = CreateTensor(flatBufferBuilder, + flatBufferBuilder.CreateVector(input1TensorShape.data(), + input1TensorShape.size()), + ::tflite::TensorType_INT32, + 1, + flatBufferBuilder.CreateString("axis"), + quantizationParameters); + + // Create output tensor + tensors[2] = CreateTensor(flatBufferBuilder, + flatBufferBuilder.CreateVector(outputTensorShape.data(), + outputTensorShape.size()), + tensorType, + 0, + flatBufferBuilder.CreateString("output"), + quantizationParameters); + + // Create operator. Reduce operations MIN, MAX, SUM, MEAN uses ReducerOptions. + tflite::BuiltinOptions operatorBuiltinOptionsType = tflite::BuiltinOptions_ReducerOptions; + flatbuffers::Offset operatorBuiltinOptions = CreateReducerOptions(flatBufferBuilder, keepDims).Union(); + + const std::vector operatorInputs{ {0, 1} }; + const std::vector operatorOutputs{ 2 }; + flatbuffers::Offset reduceOperator = + CreateOperator(flatBufferBuilder, + 0, + flatBufferBuilder.CreateVector(operatorInputs.data(), operatorInputs.size()), + flatBufferBuilder.CreateVector(operatorOutputs.data(), operatorOutputs.size()), + operatorBuiltinOptionsType, + operatorBuiltinOptions); + + const std::vector subgraphInputs{ {0, 1} }; + const std::vector subgraphOutputs{ 2 }; + flatbuffers::Offset subgraph = + CreateSubGraph(flatBufferBuilder, + flatBufferBuilder.CreateVector(tensors.data(), tensors.size()), + flatBufferBuilder.CreateVector(subgraphInputs.data(), subgraphInputs.size()), + flatBufferBuilder.CreateVector(subgraphOutputs.data(), subgraphOutputs.size()), + flatBufferBuilder.CreateVector(&reduceOperator, 1)); + + flatbuffers::Offset modelDescription = + flatBufferBuilder.CreateString("ArmnnDelegate: Reduce Operator Model"); + flatbuffers::Offset operatorCode = CreateOperatorCode(flatBufferBuilder, reduceOperatorCode); + + flatbuffers::Offset flatbufferModel = + CreateModel(flatBufferBuilder, + TFLITE_SCHEMA_VERSION, + flatBufferBuilder.CreateVector(&operatorCode, 1), + flatBufferBuilder.CreateVector(&subgraph, 1), + modelDescription, + flatBufferBuilder.CreateVector(buffers.data(), buffers.size())); + + flatBufferBuilder.Finish(flatbufferModel); + + return std::vector(flatBufferBuilder.GetBufferPointer(), + flatBufferBuilder.GetBufferPointer() + flatBufferBuilder.GetSize()); +} + +template +void ReduceTest(tflite::BuiltinOperator reduceOperatorCode, + tflite::TensorType tensorType, + std::vector& backends, + std::vector& input0Shape, + std::vector& input1Shape, + std::vector& expectedOutputShape, + std::vector& input0Values, + std::vector& input1Values, + std::vector& expectedOutputValues, + const bool keepDims, + float quantScale = 1.0f, + int quantOffset = 0) +{ + using namespace tflite; + std::vector modelBuffer = CreateReduceTfLiteModel(reduceOperatorCode, + tensorType, + input0Shape, + input1Shape, + expectedOutputShape, + input1Values, + keepDims, + quantScale, + quantOffset); + + const Model* tfLiteModel = GetModel(modelBuffer.data()); + + // Create TfLite Interpreters + std::unique_ptr armnnDelegateInterpreter; + CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver()) + (&armnnDelegateInterpreter) == kTfLiteOk); + CHECK(armnnDelegateInterpreter != nullptr); + CHECK(armnnDelegateInterpreter->AllocateTensors() == kTfLiteOk); + + std::unique_ptr tfLiteInterpreter; + CHECK(InterpreterBuilder(tfLiteModel, ::tflite::ops::builtin::BuiltinOpResolver()) + (&tfLiteInterpreter) == kTfLiteOk); + CHECK(tfLiteInterpreter != nullptr); + CHECK(tfLiteInterpreter->AllocateTensors() == kTfLiteOk); + + // Create the ArmNN Delegate + armnnDelegate::DelegateOptions delegateOptions(backends); + std::unique_ptr + theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), + armnnDelegate::TfLiteArmnnDelegateDelete); + CHECK(theArmnnDelegate != nullptr); + + // Modify armnnDelegateInterpreter to use armnnDelegate + CHECK(armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()) == kTfLiteOk); + + // Set input data + armnnDelegate::FillInput(tfLiteInterpreter, 0, input0Values); + armnnDelegate::FillInput(armnnDelegateInterpreter, 0, input0Values); + + // Run EnqueWorkload + CHECK(tfLiteInterpreter->Invoke() == kTfLiteOk); + CHECK(armnnDelegateInterpreter->Invoke() == kTfLiteOk); + + // Compare output data + armnnDelegate::CompareOutputData(tfLiteInterpreter, + armnnDelegateInterpreter, + expectedOutputShape, + expectedOutputValues); + + armnnDelegateInterpreter.reset(nullptr); +} + +} // anonymous namespace \ No newline at end of file diff --git a/include/armnn/BackendHelper.hpp b/include/armnn/BackendHelper.hpp index 3d0632da5e..a562f60c23 100644 --- a/include/armnn/BackendHelper.hpp +++ b/include/armnn/BackendHelper.hpp @@ -325,6 +325,11 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()); + bool IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()); + bool IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp index fb74877049..1467366323 100644 --- a/src/armnn/BackendHelper.cpp +++ b/src/armnn/BackendHelper.cpp @@ -568,6 +568,14 @@ bool LayerSupportHandle::IsRankSupported(const TensorInfo& input, return m_LayerSupport->IsRankSupported(input, output, reasonIfUnsupported.value()); } +bool LayerSupportHandle::IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional reasonIfUnsupported) +{ + return m_LayerSupport->IsReduceSupported(input, output, descriptor, reasonIfUnsupported.value()); +} + bool LayerSupportHandle::IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index 1b9157618e..8ce1667557 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -631,6 +631,8 @@ TfLiteParserImpl::TfLiteParserImpl(const Optionaldata.data()[i])); } } + else + { + for (uint32_t i = 0; i < inputTensorInfo0.GetNumDimensions(); ++i) + { + desc.m_vAxis.push_back(i); + } + } desc.m_TargetHeight = input0Shape[1]; desc.m_TargetWidth = input0Shape[2]; desc.m_KeepDims = options->keep_dims; - desc.m_ReduceOperation = armnn::ReduceOperation::Sum; + desc.m_ReduceOperation = reduceOperation; // Register a new layer object, Sum. IConnectableLayer *layer = m_Network->AddReduceLayer(desc, layerName.c_str()); diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp index 2603d9018a..b59571e7c3 100644 --- a/src/armnnTfLiteParser/TfLiteParser.hpp +++ b/src/armnnTfLiteParser/TfLiteParser.hpp @@ -124,6 +124,9 @@ private: void ParsePad(size_t subgraphIndex, size_t operatorIndex); void ParsePool(size_t subgraphIndex, size_t operatorIndex, armnn::PoolingAlgorithm algorithm); void ParseQuantize(size_t subgraphIndex, size_t operatorIndex); + void ParseReduce(size_t subgraphIndex, size_t operatorIndex, armnn::ReduceOperation reduceOperation); + void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex); + void ParseReduceMin(size_t subgraphIndex, size_t operatorIndex); void ParseRelu(size_t subgraphIndex, size_t operatorIndex); void ParseRelu6(size_t subgraphIndex, size_t operatorIndex); void ParseReshape(size_t subgraphIndex, size_t operatorIndex); diff --git a/src/armnnTfLiteParser/test/Reduce.cpp b/src/armnnTfLiteParser/test/Reduce.cpp new file mode 100644 index 0000000000..622d54e8b5 --- /dev/null +++ b/src/armnnTfLiteParser/test/Reduce.cpp @@ -0,0 +1,193 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include "ParserFlatbuffersFixture.hpp" +#include "../TfLiteParser.hpp" + +#include +#include + +BOOST_AUTO_TEST_SUITE(TensorflowLiteParser) + +struct ReduceMaxFixture : public ParserFlatbuffersFixture +{ + explicit ReduceMaxFixture(const std::string& inputShape, + const std::string& outputShape, + const std::string& axisShape, + const std::string& axisData) + { + m_JsonString = R"( + { + "version": 3, + "operator_codes": [ { "builtin_code": "REDUCE_MAX" } ], + "subgraphs": [ { + "tensors": [ + { + "shape": )" + inputShape + R"(, + "type": "FLOAT32", + "buffer": 0, + "name": "inputTensor", + "quantization": { + "min": [ 0.0 ], + "max": [ 255.0 ], + "scale": [ 1.0 ], + "zero_point": [ 0 ], + } + }, + { + "shape": )" + outputShape + R"( , + "type": "FLOAT32", + "buffer": 1, + "name": "outputTensor", + "quantization": { + "min": [ 0.0 ], + "max": [ 255.0 ], + "scale": [ 1.0 ], + "zero_point": [ 0 ], + } + }, + { + "shape": )" + axisShape + R"( , + "type": "INT32", + "buffer": 2, + "name": "axis", + "quantization": { + "min": [ 0.0 ], + "max": [ 255.0 ], + "scale": [ 1.0 ], + "zero_point": [ 0 ], + } + } + ], + "inputs": [ 0 ], + "outputs": [ 1 ], + "operators": [ + { + "opcode_index": 0, + "inputs": [ 0 , 2 ], + "outputs": [ 1 ], + "builtin_options_type": "ReducerOptions", + "builtin_options": { + "keep_dims": true, + }, + "custom_options_format": "FLEXBUFFERS" + } + ], + } ], + "buffers" : [ + { }, + { }, + { "data": )" + axisData + R"(, }, + ] + } + )"; + SetupSingleInputSingleOutput("inputTensor", "outputTensor"); + } +}; + +struct SimpleReduceMaxFixture : public ReduceMaxFixture +{ + SimpleReduceMaxFixture() : ReduceMaxFixture("[ 1, 1, 2, 3 ]", "[ 1, 1, 1, 3 ]", "[ 1 ]", "[ 2 ]") {} +}; + +BOOST_FIXTURE_TEST_CASE(ParseReduceMax, SimpleReduceMaxFixture) +{ + RunTest<4, armnn::DataType::Float32, armnn::DataType::Float32> + (0, {{ "inputTensor", { 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f } } }, + {{ "outputTensor", { 1001.0f, 1002.0f, 1003.0f } } }); +} + +struct ReduceMinFixture : public ParserFlatbuffersFixture +{ + explicit ReduceMinFixture(const std::string& inputShape, + const std::string& outputShape, + const std::string& axisShape, + const std::string& axisData) + { + m_JsonString = R"( + { + "version": 3, + "operator_codes": [ { "builtin_code": "REDUCE_MIN" } ], + "subgraphs": [ { + "tensors": [ + { + "shape": )" + inputShape + R"(, + "type": "FLOAT32", + "buffer": 0, + "name": "inputTensor", + "quantization": { + "min": [ 0.0 ], + "max": [ 255.0 ], + "scale": [ 1.0 ], + "zero_point": [ 0 ], + } + }, + { + "shape": )" + outputShape + R"( , + "type": "FLOAT32", + "buffer": 1, + "name": "outputTensor", + "quantization": { + "min": [ 0.0 ], + "max": [ 255.0 ], + "scale": [ 1.0 ], + "zero_point": [ 0 ], + } + }, + { + "shape": )" + axisShape + R"( , + "type": "INT32", + "buffer": 2, + "name": "axis", + "quantization": { + "min": [ 0.0 ], + "max": [ 255.0 ], + "scale": [ 1.0 ], + "zero_point": [ 0 ], + } + } + ], + "inputs": [ 0 ], + "outputs": [ 1 ], + "operators": [ + { + "opcode_index": 0, + "inputs": [ 0 , 2 ], + "outputs": [ 1 ], + "builtin_options_type": "ReducerOptions", + "builtin_options": { + "keep_dims": true, + }, + "custom_options_format": "FLEXBUFFERS" + } + ], + } ], + "buffers" : [ + { }, + { }, + { "data": )" + axisData + R"(, }, + ] + } + )"; + SetupSingleInputSingleOutput("inputTensor", "outputTensor"); + } +}; + +struct SimpleReduceMinFixture : public ReduceMinFixture +{ + SimpleReduceMinFixture() : ReduceMinFixture("[ 1, 1, 2, 3 ]", "[ 1, 1, 1, 3 ]", "[ 1 ]", "[ 2 ]") {} +}; + +BOOST_FIXTURE_TEST_CASE(ParseReduceMin, SimpleReduceMinFixture) +{ + RunTest<4, armnn::DataType::Float32, armnn::DataType::Float32> + (0, {{ "inputTensor", { 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f } } }, + {{ "outputTensor", { 10.0f, 11.0f, 12.0f } } }); +} + +BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp index 2a0707872e..d9efab288f 100644 --- a/src/backends/aclCommon/ArmComputeUtils.hpp +++ b/src/backends/aclCommon/ArmComputeUtils.hpp @@ -255,4 +255,16 @@ inline unsigned int ComputePositiveAxis(const int& axis, const armnn::TensorInfo return static_cast(positiveAxis); } +inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor& descriptor) +{ + switch (descriptor.m_ReduceOperation) + { + case ReduceOperation::Sum: return arm_compute::ReductionOperation::SUM; + case ReduceOperation::Mean: return arm_compute::ReductionOperation::MEAN_SUM; + case ReduceOperation::Max: return arm_compute::ReductionOperation::MAX; + case ReduceOperation::Min: return arm_compute::ReductionOperation::MIN; + default: throw InvalidArgumentException("Unsupported Reduction operation"); + } +} + } // namespace armnn diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 3b6299daf3..54c791677f 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -75,6 +75,7 @@ COMMON_TEST_SOURCES := \ test/layerTests/PadTestImpl.cpp \ test/layerTests/Pooling2dTestImpl.cpp \ test/layerTests/RankTestImpl.cpp \ + test/layerTests/ReductionTestImpl.cpp \ test/layerTests/ReduceSumTestImpl.cpp \ test/layerTests/ReshapeTestImpl.cpp \ test/layerTests/ResizeTestImpl.cpp \ diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index b20ef2dd25..f92e0745d3 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -137,6 +137,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources layerTests/QuantizeTestImpl.hpp layerTests/RankTestImpl.cpp layerTests/RankTestImpl.hpp + layerTests/ReductionTestImpl.cpp + layerTests/ReductionTestImpl.hpp layerTests/ReduceSumTestImpl.cpp layerTests/ReduceSumTestImpl.hpp layerTests/ReshapeTestImpl.cpp diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index d87a3b08ab..a7dcb9988f 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -48,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp index 4edbd1108a..18821b9549 100644 --- a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp @@ -24,7 +24,8 @@ LayerTestResult ReduceTestCommon( const std::vector& inputData, const std::vector& outputData, const std::vector vAxis, - const armnn::ReduceOperation reduceOperation) + const armnn::ReduceOperation reduceOperation, + bool keepDims = false) { IgnoreUnused(memoryManager); auto inputTensor = MakeTensor(inputTensorInfo, ConvertToDataType(inputData, inputTensorInfo)); @@ -53,6 +54,7 @@ LayerTestResult ReduceTestCommon( descriptor.m_Parameters.m_vAxis = updated_idx; descriptor.m_Parameters.m_ReduceOperation = reduceOperation; + descriptor.m_Parameters.m_KeepDims = keepDims; armnn::WorkloadInfo info; AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); @@ -268,7 +270,8 @@ LayerTestResult ReduceSumSingleAxisTest3( inputValues, outputValues, { 3 }, - armnn::ReduceOperation::Sum); + armnn::ReduceOperation::Sum, + true); } template diff --git a/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp new file mode 100644 index 0000000000..589cc03cbc --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp @@ -0,0 +1,315 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ReductionTestImpl.hpp" + +#include +#include +#include + +#include + +#include + +namespace +{ + +template> +LayerTestResult ReductionTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory, + const armnn::TensorInfo inputTensorInfo, + const armnn::TensorInfo outputTensorInfo, + const std::vector& inputData, + const std::vector& outputData, + const std::vector vAxis, + const armnn::ReduceOperation reduceOperation, + bool keepDims = false) +{ + IgnoreUnused(memoryManager); + auto inputTensor = MakeTensor(inputTensorInfo, ConvertToDataType(inputData, inputTensorInfo)); + + LayerTestResult result(outputTensorInfo); + result.outputExpected = MakeTensor(outputTensorInfo, outputData); + + std::unique_ptr inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ReduceQueueDescriptor descriptor; + std::vector updated_idx; + uint32_t resolvedAxis = 0; + for (uint32_t i = 0; i < vAxis.size(); ++i) + { + if (vAxis[i] < 0) + { + resolvedAxis = inputTensorInfo.GetNumDimensions() + static_cast(vAxis[i]); + } else + { + resolvedAxis = static_cast(vAxis[i]); + } + + updated_idx.push_back(resolvedAxis); + } + + descriptor.m_Parameters.m_vAxis = updated_idx; + descriptor.m_Parameters.m_ReduceOperation = reduceOperation; + descriptor.m_Parameters.m_KeepDims = keepDims; + armnn::WorkloadInfo info; + + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr workload = workloadFactory.CreateReduce(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin()); + + workload->Execute(); + + CopyDataFromITensorHandle(result.output.origin(), outputHandle.get()); + + return result; +} + +} // namespace + +template +LayerTestResult ReduceMaxSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 1, 3}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector outputValues + ({ + 1001.0f, 1002.0f, 1003.0f + }); + + return ReductionTestCommon(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { 2 }, + armnn::ReduceOperation::Max); +} + +template +LayerTestResult ReduceMaxNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 2, 1}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector outputValues + ({ + 1003.0f, 1002.0f + }); + + return ReductionTestCommon(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { -1 }, + armnn::ReduceOperation::Max, + true); +} + +template +LayerTestResult ReduceMaxSimpleTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 2, 1 }; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector inputValues + ({ + 1.0f, 3.0f, 2.0f, + 6.0f, 4.0f, 5.0f + }); + + std::vector outputValues + ({ + 3.0f, 6.0f + }); + + return ReductionTestCommon(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { 3 }, + armnn::ReduceOperation::Max, + true); +} + +template +LayerTestResult ReduceMinSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape { 1, 1, 2, 3 }; + const armnn::TensorShape outputShape { 1, 1, 1, 3}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector outputValues + ({ + 10.0f, 11.0f, 12.0f + }); + + return ReductionTestCommon(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { 2 }, + armnn::ReduceOperation::Min); +} + +template +LayerTestResult ReduceMinNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 2, 1}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector outputValues + ({ + 11.0f, 10.0f + }); + + return ReductionTestCommon(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { -1 }, + armnn::ReduceOperation::Min, + true); +} + +// Explicit template specializations +template LayerTestResult +ReduceMaxSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult +ReduceMaxNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult +ReduceMaxSimpleTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult +ReduceMinSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult +ReduceMinNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + diff --git a/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp new file mode 100644 index 0000000000..495a74b64f --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "LayerTestResult.hpp" + +#include + +#include +#include + +template> +LayerTestResult ReduceMaxSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult ReduceMaxNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult ReduceMaxSimpleTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult ReduceMinSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template> +LayerTestResult ReduceMinNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index 65454d4fc4..0ab79309a0 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -60,6 +60,7 @@ #include "workloads/ClQLstmWorkload.hpp" #include "workloads/ClQuantizedLstmWorkload.hpp" #include "workloads/ClQuantizeWorkload.hpp" +#include "workloads/ClReduceWorkload.hpp" #include "workloads/ClReshapeWorkload.hpp" #include "workloads/ClResizeWorkload.hpp" #include "workloads/ClRsqrtWorkload.hpp" @@ -798,6 +799,18 @@ bool ClLayerSupport::IsQuantizeSupported(const TensorInfo& input, output); } +bool ClLayerSupport::IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClReduceWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool ClLayerSupport::IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index f2df94c8e2..8b873915dd 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -253,6 +253,11 @@ public: const TensorInfo& output, Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index d65b26314e..ee6bcd3bc3 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -575,6 +575,12 @@ std::unique_ptr ClWorkloadFactory::CreateRank(const RankQueueDescript return std::make_unique(descriptor, info); } +std::unique_ptr ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 66aea8498f..1d2c572103 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -206,6 +206,9 @@ public: std::unique_ptr CreateRank(const RankQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 9514750563..9a83257272 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -66,6 +66,7 @@ BACKEND_SOURCES := \ workloads/ClQLstmWorkload.cpp \ workloads/ClQuantizedLstmWorkload.cpp \ workloads/ClQuantizeWorkload.cpp \ + workloads/ClReduceWorkload.cpp \ workloads/ClReshapeWorkload.cpp \ workloads/ClResizeWorkload.cpp \ workloads/ClRsqrtWorkload.cpp \ diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 018a62df95..013965c445 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -1271,6 +1271,21 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast1, LogicalOrBroadcast1Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test) +// ReduceSum +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3) + +// ReduceMax +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2) + +// ReduceMin +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest) + #if defined(ARMNNREF_ENABLED) // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index 7427ea018d..3a1b6b8432 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -87,6 +87,8 @@ list(APPEND armnnClBackendWorkloads_sources ClQuantizeWorkload.cpp ClQuantizeWorkload.hpp ClRankWorkload.hpp + ClReduceWorkload.cpp + ClReduceWorkload.hpp ClReshapeWorkload.cpp ClReshapeWorkload.hpp ClResizeWorkload.cpp diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp new file mode 100644 index 0000000000..6f594ff7a9 --- /dev/null +++ b/src/backends/cl/workloads/ClReduceWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReduceWorkload.hpp" + +#include +#include +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "ClReduceWorkload: Reduction is supported only on 1 axis."); + } + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), + input.GetNumDimensions(), + desc.m_vAxis); + + + return arm_compute::CLReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); +} + +ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReduceWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), + info.m_InputTensorInfos[0].GetNumDimensions(), + m_Data.m_Parameters.m_vAxis); + m_Layer.configure(&input, + &output, + static_cast(coords[0]), + ConvertReductionOperationToAcl(m_Data.m_Parameters), + m_Data.m_Parameters.m_KeepDims); +} + +void ClReduceWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReduceWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClReduceWorkload.hpp b/src/backends/cl/workloads/ClReduceWorkload.hpp new file mode 100644 index 0000000000..8481eeea5a --- /dev/null +++ b/src/backends/cl/workloads/ClReduceWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc); + +class ClReduceWorkload : public BaseWorkload +{ +public: + ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReductionOperation m_Layer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index 0045e7a77f..f99a9fa11b 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -44,6 +44,7 @@ #include "ClQuantizeWorkload.hpp" #include "ClQuantizedLstmWorkload.hpp" #include "ClRankWorkload.hpp" +#include "ClReduceWorkload.hpp" #include "ClReshapeWorkload.hpp" #include "ClResizeWorkload.hpp" #include "ClRsqrtWorkload.hpp" diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 2d22576e57..66999c1a30 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -58,6 +58,7 @@ #include "workloads/NeonQLstmWorkload.hpp" #include "workloads/NeonQuantizeWorkload.hpp" #include "workloads/NeonQuantizedLstmWorkload.hpp" +#include "workloads/NeonReduceWorkload.hpp" #include "workloads/NeonReshapeWorkload.hpp" #include "workloads/NeonResizeWorkload.hpp" #include "workloads/NeonRsqrtWorkload.hpp" @@ -784,6 +785,18 @@ bool NeonLayerSupport::IsQuantizedLstmSupported(const TensorInfo& input, paramsInfo); } +bool NeonLayerSupport::IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonReduceWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index dc13cc2e4e..2ae1b0d489 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -263,6 +263,11 @@ public: const QuantizedLstmInputParamsInfo& paramsInfo, Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 0d36110da5..7d0942874e 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -497,6 +497,12 @@ std::unique_ptr NeonWorkloadFactory::CreateRank(const RankQueueDescri return std::make_unique(descriptor, info); } +std::unique_ptr NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + std::unique_ptr NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 745dece659..4817a06540 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -207,6 +207,9 @@ public: std::unique_ptr CreateRank(const RankQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 54560cb0fa..6feeeb5f2c 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -66,6 +66,7 @@ BACKEND_SOURCES := \ workloads/NeonQLstmWorkload.cpp \ workloads/NeonQuantizedLstmWorkload.cpp \ workloads/NeonQuantizeWorkload.cpp \ + workloads/NeonReduceWorkload.cpp \ workloads/NeonReshapeWorkload.cpp \ workloads/NeonResizeWorkload.cpp \ workloads/NeonRsqrtWorkload.cpp \ diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp index d351870645..8434a67082 100644 --- a/src/backends/neon/test/NeonLayerTests.cpp +++ b/src/backends/neon/test/NeonLayerTests.cpp @@ -1372,6 +1372,21 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast1, LogicalOrBroadcast1Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test) +// ReduceSum +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3) + +// ReduceMax +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2) + +// ReduceMin +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest) + #if defined(ARMNNREF_ENABLED) // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index f1a723b324..7c2b185ec3 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -93,6 +93,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonQuantizeWorkload.cpp NeonQuantizeWorkload.hpp NeonRankWorkload.hpp + NeonReduceWorkload.cpp + NeonReduceWorkload.hpp NeonReshapeWorkload.cpp NeonReshapeWorkload.hpp NeonResizeWorkload.cpp diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp new file mode 100644 index 0000000000..0e1b46a3a1 --- /dev/null +++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReduceWorkload.hpp" + +#include +#include + +#include + +#include "NeonWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "NeonReduceWorkload: Reduction is supported only on 1 axis."); + } + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), + input.GetNumDimensions(), + desc.m_vAxis); + + return arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); +} + +NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), + info.m_InputTensorInfos[0].GetNumDimensions(), + m_Data.m_Parameters.m_vAxis); + m_Layer.configure(&input, + &output, + static_cast(coords[0]), + ConvertReductionOperationToAcl(m_Data.m_Parameters), + m_Data.m_Parameters.m_KeepDims); +} + +void NeonReduceWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp new file mode 100644 index 0000000000..0472091fbf --- /dev/null +++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include + +namespace armnn +{ + +arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc); + +class NeonReduceWorkload : public BaseWorkload +{ +public: + NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::NEReductionOperation m_Layer; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index 949100d50a..4eb526a04d 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -49,6 +49,7 @@ #include "NeonQuantizedLstmWorkload.hpp" #include "NeonQuantizeWorkload.hpp" #include "NeonRankWorkload.hpp" +#include "NeonReduceWorkload.hpp" #include "NeonReshapeWorkload.hpp" #include "NeonResizeWorkload.hpp" #include "NeonRsqrtWorkload.hpp" diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index d5e0f8290b..161476ed98 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -2241,4 +2241,13 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisT ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3) ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumMultipleAxisFloat32, ReduceSumMultipleAxisTest) +// ReduceMax +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2) + +// ReduceMin +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest) + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/reference/workloads/Reduce.cpp b/src/backends/reference/workloads/Reduce.cpp index 5375c7163a..31c6262c9a 100644 --- a/src/backends/reference/workloads/Reduce.cpp +++ b/src/backends/reference/workloads/Reduce.cpp @@ -75,33 +75,27 @@ void Reduce(const TensorInfo& inputInfo, const std::vector axis, const ReduceOperation reduceOperation) { - unsigned int inputNumDims = inputInfo.GetNumDimensions(); - unsigned int outputNumDims = outputInfo.GetNumDimensions(); - - armnn::TensorShape outputDims = outputInfo.GetShape(); armnn::TensorShape inputDims = inputInfo.GetShape(); + unsigned int inputNumDims = inputInfo.GetNumDimensions(); + unsigned int numOutputs = outputInfo.GetNumElements(); - // Initialise output data. - unsigned int numOutputs = 1; - for (unsigned int idx = 0; idx < outputNumDims; ++idx) + // Initialise temp output + std::vector tempOut(numOutputs); + if (reduceOperation == ReduceOperation::Max || reduceOperation == ReduceOperation::Min) { - numOutputs *= outputDims[idx]; + for (unsigned int idx = 0; idx < numOutputs; ++idx) + { + input[idx]; + tempOut[idx] = input.Get(); + } } - - std::vector tempSum(numOutputs); - for (unsigned int idx = 0; idx < numOutputs; ++idx) + else { - output[idx]; - output.Set(0.0f); - tempSum[idx] = 0.0f; + std::fill(tempOut.begin(), tempOut.end(), 0.0); } - // Initialise temp index. - std::vector tempIndex(inputNumDims); - for (unsigned int idx = 0; idx < inputNumDims; ++idx) - { - tempIndex[idx] = 0; - } + // Initialise temp index + std::vector tempIndex(inputNumDims, 0); std::vector resolvedAxis = axis; if (resolvedAxis.empty()) @@ -113,17 +107,35 @@ void Reduce(const TensorInfo& inputInfo, } auto numResolvedAxis = armnn::numeric_cast(resolvedAxis.size()); - // Iterates through input_data and sum up the reduced axis. + // Iterates through input_data and operates over the reduced axis for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex)) { unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {}); unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, numResolvedAxis, resolvedAxis); input[inputOffset]; - tempSum[outputOffset] += input.Get(); + auto inputValue = input.Get(); + if (reduceOperation == ReduceOperation::Max) + { + if (inputValue > tempOut[outputOffset]) + { + tempOut[outputOffset] = inputValue; + } + } + else if (reduceOperation == ReduceOperation::Min) + { + if (inputValue < tempOut[outputOffset]) + { + tempOut[outputOffset] = inputValue; + } + } + else + { + tempOut[outputOffset] += inputValue; + } } - // Takes average by num of elements added to get mean. + // Takes average by num of elements added to get MEAN size_t numElementsInAxis = 1; for (unsigned int idx = 0; idx < numResolvedAxis; ++idx) { @@ -132,19 +144,21 @@ void Reduce(const TensorInfo& inputInfo, (std::numeric_limits::max() / armnn::numeric_cast(numElementsInAxis))); numElementsInAxis *= current; } - if (numElementsInAxis > 0) { - for (unsigned int idx = 0; idx < numOutputs; ++idx) + + for (unsigned int idx = 0; idx < numOutputs; ++idx) + { + output[idx]; + if (reduceOperation == ReduceOperation::Mean) { - output[idx]; - if (reduceOperation == ReduceOperation::Sum) - { - output.Set(tempSum[idx]); - } - else if (reduceOperation == ReduceOperation::Mean) + if (numElementsInAxis > 0) { - output.Set(tempSum[idx] / armnn::numeric_cast(numElementsInAxis)); + output.Set(tempOut[idx] / armnn::numeric_cast(numElementsInAxis)); } } + else + { + output.Set(tempOut[idx]); + } } } -- cgit v1.2.1