aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSadik Armagan <sadik.armagan@arm.com>2021-02-09 10:28:54 +0000
committerSadik Armagan <sadik.armagan@arm.com>2021-02-09 10:31:14 +0000
commita2747487fbe7eb6d9f5357c6d16c32355ed6e01c (patch)
tree6f6f8b38100d16f1ec8a0e5be71e8e6ae1cc600a /src
parentac001eebca101f2df4973d2f1d8cfca026e07419 (diff)
downloadarmnn-a2747487fbe7eb6d9f5357c6d16c32355ed6e01c.tar.gz
MLCE-347 'REDUCE_MIN, REDUCE_MAX, REDUCE_SUM Support'
* Added TfLiteParser support for REDUCE_MIN and REDUCE_MAX operators * Added ACL workloads support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators * Added TfLite Delegate support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators Signed-off-by: Sadik Armagan <sadik.armagan@arm.com> Change-Id: I8085d59946bfd4ab78a59a61f899031ae53371a8
Diffstat (limited to 'src')
-rw-r--r--src/armnn/BackendHelper.cpp8
-rw-r--r--src/armnnTfLiteParser/TfLiteParser.cpp28
-rw-r--r--src/armnnTfLiteParser/TfLiteParser.hpp3
-rw-r--r--src/armnnTfLiteParser/test/Reduce.cpp193
-rw-r--r--src/backends/aclCommon/ArmComputeUtils.hpp12
-rw-r--r--src/backends/backendsCommon/common.mk1
-rw-r--r--src/backends/backendsCommon/test/CMakeLists.txt2
-rw-r--r--src/backends/backendsCommon/test/LayerTests.hpp1
-rw-r--r--src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp7
-rw-r--r--src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp315
-rw-r--r--src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp43
-rw-r--r--src/backends/cl/ClLayerSupport.cpp13
-rw-r--r--src/backends/cl/ClLayerSupport.hpp5
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp6
-rw-r--r--src/backends/cl/ClWorkloadFactory.hpp3
-rw-r--r--src/backends/cl/backend.mk1
-rw-r--r--src/backends/cl/test/ClLayerTests.cpp15
-rw-r--r--src/backends/cl/workloads/CMakeLists.txt2
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.cpp66
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.hpp30
-rw-r--r--src/backends/cl/workloads/ClWorkloads.hpp1
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp13
-rw-r--r--src/backends/neon/NeonLayerSupport.hpp5
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp6
-rw-r--r--src/backends/neon/NeonWorkloadFactory.hpp3
-rw-r--r--src/backends/neon/backend.mk1
-rw-r--r--src/backends/neon/test/NeonLayerTests.cpp15
-rw-r--r--src/backends/neon/workloads/CMakeLists.txt2
-rw-r--r--src/backends/neon/workloads/NeonReduceWorkload.cpp66
-rw-r--r--src/backends/neon/workloads/NeonReduceWorkload.hpp30
-rw-r--r--src/backends/neon/workloads/NeonWorkloads.hpp1
-rw-r--r--src/backends/reference/test/RefLayerTests.cpp9
-rw-r--r--src/backends/reference/workloads/Reduce.cpp78
33 files changed, 948 insertions, 36 deletions
diff --git a/src/armnn/BackendHelper.cpp b/src/armnn/BackendHelper.cpp
index fb74877049..1467366323 100644
--- a/src/armnn/BackendHelper.cpp
+++ b/src/armnn/BackendHelper.cpp
@@ -568,6 +568,14 @@ bool LayerSupportHandle::IsRankSupported(const TensorInfo& input,
return m_LayerSupport->IsRankSupported(input, output, reasonIfUnsupported.value());
}
+bool LayerSupportHandle::IsReduceSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported)
+{
+ return m_LayerSupport->IsReduceSupported(input, output, descriptor, reasonIfUnsupported.value());
+}
+
bool LayerSupportHandle::IsReshapeSupported(const TensorInfo& input,
const TensorInfo& output,
const ReshapeDescriptor& descriptor,
diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp
index 1b9157618e..8ce1667557 100644
--- a/src/armnnTfLiteParser/TfLiteParser.cpp
+++ b/src/armnnTfLiteParser/TfLiteParser.cpp
@@ -631,6 +631,8 @@ TfLiteParserImpl::TfLiteParserImpl(const Optional<ITfLiteParser::TfLiteParserOpt
m_ParserFunctions[tflite::BuiltinOperator_QUANTIZE] = &TfLiteParserImpl::ParseQuantize;
m_ParserFunctions[tflite::BuiltinOperator_RELU] = &TfLiteParserImpl::ParseRelu;
m_ParserFunctions[tflite::BuiltinOperator_RELU6] = &TfLiteParserImpl::ParseRelu6;
+ m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MAX] = &TfLiteParserImpl::ParseReduceMax;
+ m_ParserFunctions[tflite::BuiltinOperator_REDUCE_MIN] = &TfLiteParserImpl::ParseReduceMin;
m_ParserFunctions[tflite::BuiltinOperator_RESHAPE] = &TfLiteParserImpl::ParseReshape;
m_ParserFunctions[tflite::BuiltinOperator_RESIZE_BILINEAR] = &TfLiteParserImpl::ParseResizeBilinear;
m_ParserFunctions[tflite::BuiltinOperator_RESIZE_NEAREST_NEIGHBOR] = &TfLiteParserImpl::ParseResizeNearestNeighbor;
@@ -3059,6 +3061,21 @@ void TfLiteParserImpl::ParseDepthToSpace(size_t subgraphIndex, size_t operatorIn
void TfLiteParserImpl::ParseSum(size_t subgraphIndex, size_t operatorIndex)
{
+ ParseReduce(subgraphIndex, operatorIndex, armnn::ReduceOperation::Sum);
+}
+
+void TfLiteParserImpl::ParseReduceMax(size_t subgraphIndex, size_t operatorIndex)
+{
+ ParseReduce(subgraphIndex, operatorIndex, armnn::ReduceOperation::Max);
+}
+
+void TfLiteParserImpl::ParseReduceMin(size_t subgraphIndex, size_t operatorIndex)
+{
+ ParseReduce(subgraphIndex, operatorIndex, armnn::ReduceOperation::Min);
+}
+
+void TfLiteParserImpl::ParseReduce(size_t subgraphIndex, size_t operatorIndex, ReduceOperation reduceOperation)
+{
CHECK_MODEL(m_Model, subgraphIndex, operatorIndex);
const auto &operatorPtr = m_Model->subgraphs[subgraphIndex]->operators[operatorIndex];
@@ -3070,7 +3087,7 @@ void TfLiteParserImpl::ParseSum(size_t subgraphIndex, size_t operatorIndex)
auto outputs = GetOutputs(m_Model, subgraphIndex, operatorIndex);
CHECK_VALID_SIZE(outputs.size(), 1);
- auto layerName = fmt::format("Sum:{}:{}", subgraphIndex, operatorIndex);
+ auto layerName = fmt::format("Reduce:{}:{}", subgraphIndex, operatorIndex);
armnn::TensorInfo inputTensorInfo0 = ToTensorInfo(inputs[0]);
armnn::TensorInfo inputTensorInfo1 = ToTensorInfo(inputs[1]);
@@ -3088,11 +3105,18 @@ void TfLiteParserImpl::ParseSum(size_t subgraphIndex, size_t operatorIndex)
axisBufferPtr->data.data()[i]));
}
}
+ else
+ {
+ for (uint32_t i = 0; i < inputTensorInfo0.GetNumDimensions(); ++i)
+ {
+ desc.m_vAxis.push_back(i);
+ }
+ }
desc.m_TargetHeight = input0Shape[1];
desc.m_TargetWidth = input0Shape[2];
desc.m_KeepDims = options->keep_dims;
- desc.m_ReduceOperation = armnn::ReduceOperation::Sum;
+ desc.m_ReduceOperation = reduceOperation;
// Register a new layer object, Sum.
IConnectableLayer *layer = m_Network->AddReduceLayer(desc, layerName.c_str());
diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp
index 2603d9018a..b59571e7c3 100644
--- a/src/armnnTfLiteParser/TfLiteParser.hpp
+++ b/src/armnnTfLiteParser/TfLiteParser.hpp
@@ -124,6 +124,9 @@ private:
void ParsePad(size_t subgraphIndex, size_t operatorIndex);
void ParsePool(size_t subgraphIndex, size_t operatorIndex, armnn::PoolingAlgorithm algorithm);
void ParseQuantize(size_t subgraphIndex, size_t operatorIndex);
+ void ParseReduce(size_t subgraphIndex, size_t operatorIndex, armnn::ReduceOperation reduceOperation);
+ void ParseReduceMax(size_t subgraphIndex, size_t operatorIndex);
+ void ParseReduceMin(size_t subgraphIndex, size_t operatorIndex);
void ParseRelu(size_t subgraphIndex, size_t operatorIndex);
void ParseRelu6(size_t subgraphIndex, size_t operatorIndex);
void ParseReshape(size_t subgraphIndex, size_t operatorIndex);
diff --git a/src/armnnTfLiteParser/test/Reduce.cpp b/src/armnnTfLiteParser/test/Reduce.cpp
new file mode 100644
index 0000000000..622d54e8b5
--- /dev/null
+++ b/src/armnnTfLiteParser/test/Reduce.cpp
@@ -0,0 +1,193 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include <boost/test/unit_test.hpp>
+#include "ParserFlatbuffersFixture.hpp"
+#include "../TfLiteParser.hpp"
+
+#include <string>
+#include <iostream>
+
+BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
+
+struct ReduceMaxFixture : public ParserFlatbuffersFixture
+{
+ explicit ReduceMaxFixture(const std::string& inputShape,
+ const std::string& outputShape,
+ const std::string& axisShape,
+ const std::string& axisData)
+ {
+ m_JsonString = R"(
+ {
+ "version": 3,
+ "operator_codes": [ { "builtin_code": "REDUCE_MAX" } ],
+ "subgraphs": [ {
+ "tensors": [
+ {
+ "shape": )" + inputShape + R"(,
+ "type": "FLOAT32",
+ "buffer": 0,
+ "name": "inputTensor",
+ "quantization": {
+ "min": [ 0.0 ],
+ "max": [ 255.0 ],
+ "scale": [ 1.0 ],
+ "zero_point": [ 0 ],
+ }
+ },
+ {
+ "shape": )" + outputShape + R"( ,
+ "type": "FLOAT32",
+ "buffer": 1,
+ "name": "outputTensor",
+ "quantization": {
+ "min": [ 0.0 ],
+ "max": [ 255.0 ],
+ "scale": [ 1.0 ],
+ "zero_point": [ 0 ],
+ }
+ },
+ {
+ "shape": )" + axisShape + R"( ,
+ "type": "INT32",
+ "buffer": 2,
+ "name": "axis",
+ "quantization": {
+ "min": [ 0.0 ],
+ "max": [ 255.0 ],
+ "scale": [ 1.0 ],
+ "zero_point": [ 0 ],
+ }
+ }
+ ],
+ "inputs": [ 0 ],
+ "outputs": [ 1 ],
+ "operators": [
+ {
+ "opcode_index": 0,
+ "inputs": [ 0 , 2 ],
+ "outputs": [ 1 ],
+ "builtin_options_type": "ReducerOptions",
+ "builtin_options": {
+ "keep_dims": true,
+ },
+ "custom_options_format": "FLEXBUFFERS"
+ }
+ ],
+ } ],
+ "buffers" : [
+ { },
+ { },
+ { "data": )" + axisData + R"(, },
+ ]
+ }
+ )";
+ SetupSingleInputSingleOutput("inputTensor", "outputTensor");
+ }
+};
+
+struct SimpleReduceMaxFixture : public ReduceMaxFixture
+{
+ SimpleReduceMaxFixture() : ReduceMaxFixture("[ 1, 1, 2, 3 ]", "[ 1, 1, 1, 3 ]", "[ 1 ]", "[ 2 ]") {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseReduceMax, SimpleReduceMaxFixture)
+{
+ RunTest<4, armnn::DataType::Float32, armnn::DataType::Float32>
+ (0, {{ "inputTensor", { 1001.0f, 11.0f, 1003.0f,
+ 10.0f, 1002.0f, 12.0f } } },
+ {{ "outputTensor", { 1001.0f, 1002.0f, 1003.0f } } });
+}
+
+struct ReduceMinFixture : public ParserFlatbuffersFixture
+{
+ explicit ReduceMinFixture(const std::string& inputShape,
+ const std::string& outputShape,
+ const std::string& axisShape,
+ const std::string& axisData)
+ {
+ m_JsonString = R"(
+ {
+ "version": 3,
+ "operator_codes": [ { "builtin_code": "REDUCE_MIN" } ],
+ "subgraphs": [ {
+ "tensors": [
+ {
+ "shape": )" + inputShape + R"(,
+ "type": "FLOAT32",
+ "buffer": 0,
+ "name": "inputTensor",
+ "quantization": {
+ "min": [ 0.0 ],
+ "max": [ 255.0 ],
+ "scale": [ 1.0 ],
+ "zero_point": [ 0 ],
+ }
+ },
+ {
+ "shape": )" + outputShape + R"( ,
+ "type": "FLOAT32",
+ "buffer": 1,
+ "name": "outputTensor",
+ "quantization": {
+ "min": [ 0.0 ],
+ "max": [ 255.0 ],
+ "scale": [ 1.0 ],
+ "zero_point": [ 0 ],
+ }
+ },
+ {
+ "shape": )" + axisShape + R"( ,
+ "type": "INT32",
+ "buffer": 2,
+ "name": "axis",
+ "quantization": {
+ "min": [ 0.0 ],
+ "max": [ 255.0 ],
+ "scale": [ 1.0 ],
+ "zero_point": [ 0 ],
+ }
+ }
+ ],
+ "inputs": [ 0 ],
+ "outputs": [ 1 ],
+ "operators": [
+ {
+ "opcode_index": 0,
+ "inputs": [ 0 , 2 ],
+ "outputs": [ 1 ],
+ "builtin_options_type": "ReducerOptions",
+ "builtin_options": {
+ "keep_dims": true,
+ },
+ "custom_options_format": "FLEXBUFFERS"
+ }
+ ],
+ } ],
+ "buffers" : [
+ { },
+ { },
+ { "data": )" + axisData + R"(, },
+ ]
+ }
+ )";
+ SetupSingleInputSingleOutput("inputTensor", "outputTensor");
+ }
+};
+
+struct SimpleReduceMinFixture : public ReduceMinFixture
+{
+ SimpleReduceMinFixture() : ReduceMinFixture("[ 1, 1, 2, 3 ]", "[ 1, 1, 1, 3 ]", "[ 1 ]", "[ 2 ]") {}
+};
+
+BOOST_FIXTURE_TEST_CASE(ParseReduceMin, SimpleReduceMinFixture)
+{
+ RunTest<4, armnn::DataType::Float32, armnn::DataType::Float32>
+ (0, {{ "inputTensor", { 1001.0f, 11.0f, 1003.0f,
+ 10.0f, 1002.0f, 12.0f } } },
+ {{ "outputTensor", { 10.0f, 11.0f, 12.0f } } });
+}
+
+BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index 2a0707872e..d9efab288f 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -255,4 +255,16 @@ inline unsigned int ComputePositiveAxis(const int& axis, const armnn::TensorInfo
return static_cast<unsigned int>(positiveAxis);
}
+inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor& descriptor)
+{
+ switch (descriptor.m_ReduceOperation)
+ {
+ case ReduceOperation::Sum: return arm_compute::ReductionOperation::SUM;
+ case ReduceOperation::Mean: return arm_compute::ReductionOperation::MEAN_SUM;
+ case ReduceOperation::Max: return arm_compute::ReductionOperation::MAX;
+ case ReduceOperation::Min: return arm_compute::ReductionOperation::MIN;
+ default: throw InvalidArgumentException("Unsupported Reduction operation");
+ }
+}
+
} // namespace armnn
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 3b6299daf3..54c791677f 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -75,6 +75,7 @@ COMMON_TEST_SOURCES := \
test/layerTests/PadTestImpl.cpp \
test/layerTests/Pooling2dTestImpl.cpp \
test/layerTests/RankTestImpl.cpp \
+ test/layerTests/ReductionTestImpl.cpp \
test/layerTests/ReduceSumTestImpl.cpp \
test/layerTests/ReshapeTestImpl.cpp \
test/layerTests/ResizeTestImpl.cpp \
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index b20ef2dd25..f92e0745d3 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -137,6 +137,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources
layerTests/QuantizeTestImpl.hpp
layerTests/RankTestImpl.cpp
layerTests/RankTestImpl.hpp
+ layerTests/ReductionTestImpl.cpp
+ layerTests/ReductionTestImpl.hpp
layerTests/ReduceSumTestImpl.cpp
layerTests/ReduceSumTestImpl.hpp
layerTests/ReshapeTestImpl.cpp
diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp
index d87a3b08ab..a7dcb9988f 100644
--- a/src/backends/backendsCommon/test/LayerTests.hpp
+++ b/src/backends/backendsCommon/test/LayerTests.hpp
@@ -48,6 +48,7 @@
#include <backendsCommon/test/layerTests/PreluTestImpl.hpp>
#include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp>
#include <backendsCommon/test/layerTests/RankTestImpl.hpp>
+#include <backendsCommon/test/layerTests/ReductionTestImpl.hpp>
#include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp>
#include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp>
#include <backendsCommon/test/layerTests/ResizeTestImpl.hpp>
diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
index 4edbd1108a..18821b9549 100644
--- a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
+++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp
@@ -24,7 +24,8 @@ LayerTestResult<float, 4> ReduceTestCommon(
const std::vector<float>& inputData,
const std::vector<float>& outputData,
const std::vector<int32_t> vAxis,
- const armnn::ReduceOperation reduceOperation)
+ const armnn::ReduceOperation reduceOperation,
+ bool keepDims = false)
{
IgnoreUnused(memoryManager);
auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
@@ -53,6 +54,7 @@ LayerTestResult<float, 4> ReduceTestCommon(
descriptor.m_Parameters.m_vAxis = updated_idx;
descriptor.m_Parameters.m_ReduceOperation = reduceOperation;
+ descriptor.m_Parameters.m_KeepDims = keepDims;
armnn::WorkloadInfo info;
AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
@@ -268,7 +270,8 @@ LayerTestResult<float, 4> ReduceSumSingleAxisTest3(
inputValues,
outputValues,
{ 3 },
- armnn::ReduceOperation::Sum);
+ armnn::ReduceOperation::Sum,
+ true);
}
template<armnn::DataType ArmnnType, typename T>
diff --git a/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp
new file mode 100644
index 0000000000..589cc03cbc
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp
@@ -0,0 +1,315 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ReductionTestImpl.hpp"
+
+#include <backendsCommon/test/DataTypeUtils.hpp>
+#include <backendsCommon/test/TensorCopyUtils.hpp>
+#include <backendsCommon/test/WorkloadTestUtils.hpp>
+
+#include <test/TensorHelpers.hpp>
+
+#include <iostream>
+
+namespace
+{
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReductionTestCommon(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory,
+ const armnn::TensorInfo inputTensorInfo,
+ const armnn::TensorInfo outputTensorInfo,
+ const std::vector<float>& inputData,
+ const std::vector<float>& outputData,
+ const std::vector<int32_t> vAxis,
+ const armnn::ReduceOperation reduceOperation,
+ bool keepDims = false)
+{
+ IgnoreUnused(memoryManager);
+ auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo));
+
+ LayerTestResult<float, 4> result(outputTensorInfo);
+ result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData);
+
+ std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo);
+ std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo);
+
+ armnn::ReduceQueueDescriptor descriptor;
+ std::vector<uint32_t> updated_idx;
+ uint32_t resolvedAxis = 0;
+ for (uint32_t i = 0; i < vAxis.size(); ++i)
+ {
+ if (vAxis[i] < 0)
+ {
+ resolvedAxis = inputTensorInfo.GetNumDimensions() + static_cast<uint32_t>(vAxis[i]);
+ } else
+ {
+ resolvedAxis = static_cast<uint32_t>(vAxis[i]);
+ }
+
+ updated_idx.push_back(resolvedAxis);
+ }
+
+ descriptor.m_Parameters.m_vAxis = updated_idx;
+ descriptor.m_Parameters.m_ReduceOperation = reduceOperation;
+ descriptor.m_Parameters.m_KeepDims = keepDims;
+ armnn::WorkloadInfo info;
+
+ AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get());
+ AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get());
+
+ std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduce(descriptor, info);
+
+ inputHandle->Allocate();
+ outputHandle->Allocate();
+
+ CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin());
+
+ workload->Execute();
+
+ CopyDataFromITensorHandle(result.output.origin(), outputHandle.get());
+
+ return result;
+}
+
+} // namespace
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceMaxSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ const armnn::TensorShape inputShape{ 1, 1, 2, 3 };
+ const armnn::TensorShape outputShape{ 1, 1, 1, 3};
+
+ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(1.0f);
+ inputTensorInfo.SetQuantizationOffset(0);
+ }
+
+ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+ std::vector<float> inputValues
+ ({
+ 1001.0f, 11.0f, 1003.0f,
+ 10.0f, 1002.0f, 12.0f
+ });
+ std::vector<float> outputValues
+ ({
+ 1001.0f, 1002.0f, 1003.0f
+ });
+
+ return ReductionTestCommon<ArmnnType>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ inputTensorInfo,
+ outputTensorInfo,
+ inputValues,
+ outputValues,
+ { 2 },
+ armnn::ReduceOperation::Max);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceMaxNegativeAxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ const armnn::TensorShape inputShape{ 1, 1, 2, 3 };
+ const armnn::TensorShape outputShape{ 1, 1, 2, 1};
+
+ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(1.0f);
+ inputTensorInfo.SetQuantizationOffset(0);
+ }
+
+ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+ std::vector<float> inputValues
+ ({
+ 1001.0f, 11.0f, 1003.0f,
+ 10.0f, 1002.0f, 12.0f
+ });
+ std::vector<float> outputValues
+ ({
+ 1003.0f, 1002.0f
+ });
+
+ return ReductionTestCommon<ArmnnType>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ inputTensorInfo,
+ outputTensorInfo,
+ inputValues,
+ outputValues,
+ { -1 },
+ armnn::ReduceOperation::Max,
+ true);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceMaxSimpleTest2(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ const armnn::TensorShape inputShape{ 1, 1, 2, 3 };
+ const armnn::TensorShape outputShape{ 1, 1, 2, 1 };
+
+ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(1.0f);
+ inputTensorInfo.SetQuantizationOffset(0);
+ }
+
+ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+ std::vector<float> inputValues
+ ({
+ 1.0f, 3.0f, 2.0f,
+ 6.0f, 4.0f, 5.0f
+ });
+
+ std::vector<float> outputValues
+ ({
+ 3.0f, 6.0f
+ });
+
+ return ReductionTestCommon<ArmnnType>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ inputTensorInfo,
+ outputTensorInfo,
+ inputValues,
+ outputValues,
+ { 3 },
+ armnn::ReduceOperation::Max,
+ true);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceMinSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ const armnn::TensorShape inputShape { 1, 1, 2, 3 };
+ const armnn::TensorShape outputShape { 1, 1, 1, 3};
+
+ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(1.0f);
+ inputTensorInfo.SetQuantizationOffset(0);
+ }
+
+ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+ std::vector<float> inputValues
+ ({
+ 1001.0f, 11.0f, 1003.0f,
+ 10.0f, 1002.0f, 12.0f
+ });
+ std::vector<float> outputValues
+ ({
+ 10.0f, 11.0f, 12.0f
+ });
+
+ return ReductionTestCommon<ArmnnType>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ inputTensorInfo,
+ outputTensorInfo,
+ inputValues,
+ outputValues,
+ { 2 },
+ armnn::ReduceOperation::Min);
+}
+
+template<armnn::DataType ArmnnType, typename T>
+LayerTestResult<float, 4> ReduceMinNegativeAxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory)
+{
+ const armnn::TensorShape inputShape{ 1, 1, 2, 3 };
+ const armnn::TensorShape outputShape{ 1, 1, 2, 1};
+
+ armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType);
+
+ if (armnn::IsQuantizedType<T>())
+ {
+ inputTensorInfo.SetQuantizationScale(1.0f);
+ inputTensorInfo.SetQuantizationOffset(0);
+ }
+
+ armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32);
+
+ std::vector<float> inputValues
+ ({
+ 1001.0f, 11.0f, 1003.0f,
+ 10.0f, 1002.0f, 12.0f
+ });
+ std::vector<float> outputValues
+ ({
+ 11.0f, 10.0f
+ });
+
+ return ReductionTestCommon<ArmnnType>(workloadFactory,
+ memoryManager,
+ tensorHandleFactory,
+ inputTensorInfo,
+ outputTensorInfo,
+ inputValues,
+ outputValues,
+ { -1 },
+ armnn::ReduceOperation::Min,
+ true);
+}
+
+// Explicit template specializations
+template LayerTestResult<float, 4>
+ReduceMaxSimpleTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceMaxNegativeAxisTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceMaxSimpleTest2<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceMinSimpleTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template LayerTestResult<float, 4>
+ReduceMinNegativeAxisTest<armnn::DataType::Float32>(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
diff --git a/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp
new file mode 100644
index 0000000000..495a74b64f
--- /dev/null
+++ b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "LayerTestResult.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/backends/IBackendInternal.hpp>
+#include <backendsCommon/WorkloadFactory.hpp>
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceMaxSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceMaxNegativeAxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceMaxSimpleTest2(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceMinSimpleTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
+
+template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
+LayerTestResult<float, 4> ReduceMinNegativeAxisTest(
+ armnn::IWorkloadFactory& workloadFactory,
+ const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager,
+ const armnn::ITensorHandleFactory& tensorHandleFactory);
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 65454d4fc4..0ab79309a0 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -60,6 +60,7 @@
#include "workloads/ClQLstmWorkload.hpp"
#include "workloads/ClQuantizedLstmWorkload.hpp"
#include "workloads/ClQuantizeWorkload.hpp"
+#include "workloads/ClReduceWorkload.hpp"
#include "workloads/ClReshapeWorkload.hpp"
#include "workloads/ClResizeWorkload.hpp"
#include "workloads/ClRsqrtWorkload.hpp"
@@ -798,6 +799,18 @@ bool ClLayerSupport::IsQuantizeSupported(const TensorInfo& input,
output);
}
+bool ClLayerSupport::IsReduceSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClReduceWorkloadValidate,
+ reasonIfUnsupported,
+ input,
+ output,
+ descriptor);
+}
+
bool ClLayerSupport::IsReshapeSupported(const TensorInfo& input,
const TensorInfo& output,
const ReshapeDescriptor& descriptor,
diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp
index f2df94c8e2..8b873915dd 100644
--- a/src/backends/cl/ClLayerSupport.hpp
+++ b/src/backends/cl/ClLayerSupport.hpp
@@ -253,6 +253,11 @@ public:
const TensorInfo& output,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsReduceSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsReshapeSupported(const TensorInfo& input,
const TensorInfo& output,
const ReshapeDescriptor& descriptor,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index d65b26314e..ee6bcd3bc3 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -575,6 +575,12 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRank(const RankQueueDescript
return std::make_unique<ClRankWorkload>(descriptor, info);
}
+std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<ClReduceWorkload>(descriptor, info);
+}
+
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp
index 66aea8498f..1d2c572103 100644
--- a/src/backends/cl/ClWorkloadFactory.hpp
+++ b/src/backends/cl/ClWorkloadFactory.hpp
@@ -206,6 +206,9 @@ public:
std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 9514750563..9a83257272 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -66,6 +66,7 @@ BACKEND_SOURCES := \
workloads/ClQLstmWorkload.cpp \
workloads/ClQuantizedLstmWorkload.cpp \
workloads/ClQuantizeWorkload.cpp \
+ workloads/ClReduceWorkload.cpp \
workloads/ClReshapeWorkload.cpp \
workloads/ClResizeWorkload.cpp \
workloads/ClRsqrtWorkload.cpp \
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index 018a62df95..013965c445 100644
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -1271,6 +1271,21 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast1, LogicalOrBroadcast1Test)
ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test)
ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test)
+// ReduceSum
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
+
+// ReduceMax
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>)
+
+// ReduceMin
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>)
+
#if defined(ARMNNREF_ENABLED)
// The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 7427ea018d..3a1b6b8432 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -87,6 +87,8 @@ list(APPEND armnnClBackendWorkloads_sources
ClQuantizeWorkload.cpp
ClQuantizeWorkload.hpp
ClRankWorkload.hpp
+ ClReduceWorkload.cpp
+ ClReduceWorkload.hpp
ClReshapeWorkload.cpp
ClReshapeWorkload.hpp
ClResizeWorkload.cpp
diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp
new file mode 100644
index 0000000000..6f594ff7a9
--- /dev/null
+++ b/src/backends/cl/workloads/ClReduceWorkload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClReduceWorkload.hpp"
+
+#include <cl/ClTensorHandle.hpp>
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& desc)
+{
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
+ "ClReduceWorkload: Reduction is supported only on 1 axis.");
+ }
+
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+ input.GetNumDimensions(),
+ desc.m_vAxis);
+
+
+ return arm_compute::CLReductionOperation::validate(&aclInputInfo,
+ &aclOutputInfo,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(desc),
+ desc.m_KeepDims);
+}
+
+ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClReduceWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
+ info.m_InputTensorInfos[0].GetNumDimensions(),
+ m_Data.m_Parameters.m_vAxis);
+ m_Layer.configure(&input,
+ &output,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(m_Data.m_Parameters),
+ m_Data.m_Parameters.m_KeepDims);
+}
+
+void ClReduceWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClReduceWorkload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClReduceWorkload.hpp b/src/backends/cl/workloads/ClReduceWorkload.hpp
new file mode 100644
index 0000000000..8481eeea5a
--- /dev/null
+++ b/src/backends/cl/workloads/ClReduceWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/CL/functions/CLReductionOperation.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& desc);
+
+class ClReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
+{
+public:
+ ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLReductionOperation m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index 0045e7a77f..f99a9fa11b 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -44,6 +44,7 @@
#include "ClQuantizeWorkload.hpp"
#include "ClQuantizedLstmWorkload.hpp"
#include "ClRankWorkload.hpp"
+#include "ClReduceWorkload.hpp"
#include "ClReshapeWorkload.hpp"
#include "ClResizeWorkload.hpp"
#include "ClRsqrtWorkload.hpp"
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index 2d22576e57..66999c1a30 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -58,6 +58,7 @@
#include "workloads/NeonQLstmWorkload.hpp"
#include "workloads/NeonQuantizeWorkload.hpp"
#include "workloads/NeonQuantizedLstmWorkload.hpp"
+#include "workloads/NeonReduceWorkload.hpp"
#include "workloads/NeonReshapeWorkload.hpp"
#include "workloads/NeonResizeWorkload.hpp"
#include "workloads/NeonRsqrtWorkload.hpp"
@@ -784,6 +785,18 @@ bool NeonLayerSupport::IsQuantizedLstmSupported(const TensorInfo& input,
paramsInfo);
}
+bool NeonLayerSupport::IsReduceSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported) const
+{
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonReduceWorkloadValidate,
+ reasonIfUnsupported,
+ input,
+ output,
+ descriptor);
+}
+
bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input,
const TensorInfo& output,
const ReshapeDescriptor& descriptor,
diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp
index dc13cc2e4e..2ae1b0d489 100644
--- a/src/backends/neon/NeonLayerSupport.hpp
+++ b/src/backends/neon/NeonLayerSupport.hpp
@@ -263,6 +263,11 @@ public:
const QuantizedLstmInputParamsInfo& paramsInfo,
Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+ bool IsReduceSupported(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& descriptor,
+ Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override;
+
bool IsReshapeSupported(const TensorInfo& input,
const TensorInfo& output,
const ReshapeDescriptor& descriptor,
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index 0d36110da5..7d0942874e 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -497,6 +497,12 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescri
return std::make_unique<NeonRankWorkload>(descriptor, info);
}
+std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const
+{
+ return std::make_unique<NeonReduceWorkload>(descriptor, info);
+}
+
std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp
index 745dece659..4817a06540 100644
--- a/src/backends/neon/NeonWorkloadFactory.hpp
+++ b/src/backends/neon/NeonWorkloadFactory.hpp
@@ -207,6 +207,9 @@ public:
std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
+ std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor,
+ const WorkloadInfo& info) const override;
+
std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info) const override;
diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk
index 54560cb0fa..6feeeb5f2c 100644
--- a/src/backends/neon/backend.mk
+++ b/src/backends/neon/backend.mk
@@ -66,6 +66,7 @@ BACKEND_SOURCES := \
workloads/NeonQLstmWorkload.cpp \
workloads/NeonQuantizedLstmWorkload.cpp \
workloads/NeonQuantizeWorkload.cpp \
+ workloads/NeonReduceWorkload.cpp \
workloads/NeonReshapeWorkload.cpp \
workloads/NeonResizeWorkload.cpp \
workloads/NeonRsqrtWorkload.cpp \
diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp
index d351870645..8434a67082 100644
--- a/src/backends/neon/test/NeonLayerTests.cpp
+++ b/src/backends/neon/test/NeonLayerTests.cpp
@@ -1372,6 +1372,21 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast1, LogicalOrBroadcast1Test)
ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test)
ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test)
+// ReduceSum
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
+
+// ReduceMax
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>)
+
+// ReduceMin
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>)
+
#if defined(ARMNNREF_ENABLED)
// The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt
index f1a723b324..7c2b185ec3 100644
--- a/src/backends/neon/workloads/CMakeLists.txt
+++ b/src/backends/neon/workloads/CMakeLists.txt
@@ -93,6 +93,8 @@ list(APPEND armnnNeonBackendWorkloads_sources
NeonQuantizeWorkload.cpp
NeonQuantizeWorkload.hpp
NeonRankWorkload.hpp
+ NeonReduceWorkload.cpp
+ NeonReduceWorkload.hpp
NeonReshapeWorkload.cpp
NeonReshapeWorkload.hpp
NeonResizeWorkload.cpp
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp
new file mode 100644
index 0000000000..0e1b46a3a1
--- /dev/null
+++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "NeonReduceWorkload.hpp"
+
+#include <aclCommon/ArmComputeUtils.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <neon/NeonTensorHandle.hpp>
+
+#include "NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& desc)
+{
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
+ "NeonReduceWorkload: Reduction is supported only on 1 axis.");
+ }
+
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+ input.GetNumDimensions(),
+ desc.m_vAxis);
+
+ return arm_compute::NEReductionOperation::validate(&aclInputInfo,
+ &aclOutputInfo,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(desc),
+ desc.m_KeepDims);
+}
+
+NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1);
+
+ arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
+ info.m_InputTensorInfos[0].GetNumDimensions(),
+ m_Data.m_Parameters.m_vAxis);
+ m_Layer.configure(&input,
+ &output,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(m_Data.m_Parameters),
+ m_Data.m_Parameters.m_KeepDims);
+}
+
+void NeonReduceWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp
new file mode 100644
index 0000000000..0472091fbf
--- /dev/null
+++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backendsCommon/Workload.hpp>
+
+#include <arm_compute/runtime/NEON/functions/NEReductionOperation.h>
+
+namespace armnn
+{
+
+arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ReduceDescriptor& desc);
+
+class NeonReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
+{
+public:
+ NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::NEReductionOperation m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp
index 949100d50a..4eb526a04d 100644
--- a/src/backends/neon/workloads/NeonWorkloads.hpp
+++ b/src/backends/neon/workloads/NeonWorkloads.hpp
@@ -49,6 +49,7 @@
#include "NeonQuantizedLstmWorkload.hpp"
#include "NeonQuantizeWorkload.hpp"
#include "NeonRankWorkload.hpp"
+#include "NeonReduceWorkload.hpp"
#include "NeonReshapeWorkload.hpp"
#include "NeonResizeWorkload.hpp"
#include "NeonRsqrtWorkload.hpp"
diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp
index d5e0f8290b..161476ed98 100644
--- a/src/backends/reference/test/RefLayerTests.cpp
+++ b/src/backends/reference/test/RefLayerTests.cpp
@@ -2241,4 +2241,13 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisT
ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>)
ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumMultipleAxisFloat32, ReduceSumMultipleAxisTest<DataType::Float32>)
+// ReduceMax
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>)
+
+// ReduceMin
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>)
+ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>)
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/reference/workloads/Reduce.cpp b/src/backends/reference/workloads/Reduce.cpp
index 5375c7163a..31c6262c9a 100644
--- a/src/backends/reference/workloads/Reduce.cpp
+++ b/src/backends/reference/workloads/Reduce.cpp
@@ -75,33 +75,27 @@ void Reduce(const TensorInfo& inputInfo,
const std::vector<uint32_t> axis,
const ReduceOperation reduceOperation)
{
- unsigned int inputNumDims = inputInfo.GetNumDimensions();
- unsigned int outputNumDims = outputInfo.GetNumDimensions();
-
- armnn::TensorShape outputDims = outputInfo.GetShape();
armnn::TensorShape inputDims = inputInfo.GetShape();
+ unsigned int inputNumDims = inputInfo.GetNumDimensions();
+ unsigned int numOutputs = outputInfo.GetNumElements();
- // Initialise output data.
- unsigned int numOutputs = 1;
- for (unsigned int idx = 0; idx < outputNumDims; ++idx)
+ // Initialise temp output
+ std::vector<float> tempOut(numOutputs);
+ if (reduceOperation == ReduceOperation::Max || reduceOperation == ReduceOperation::Min)
{
- numOutputs *= outputDims[idx];
+ for (unsigned int idx = 0; idx < numOutputs; ++idx)
+ {
+ input[idx];
+ tempOut[idx] = input.Get();
+ }
}
-
- std::vector<float> tempSum(numOutputs);
- for (unsigned int idx = 0; idx < numOutputs; ++idx)
+ else
{
- output[idx];
- output.Set(0.0f);
- tempSum[idx] = 0.0f;
+ std::fill(tempOut.begin(), tempOut.end(), 0.0);
}
- // Initialise temp index.
- std::vector<unsigned int> tempIndex(inputNumDims);
- for (unsigned int idx = 0; idx < inputNumDims; ++idx)
- {
- tempIndex[idx] = 0;
- }
+ // Initialise temp index
+ std::vector<unsigned int> tempIndex(inputNumDims, 0);
std::vector<unsigned int> resolvedAxis = axis;
if (resolvedAxis.empty())
@@ -113,17 +107,35 @@ void Reduce(const TensorInfo& inputInfo,
}
auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size());
- // Iterates through input_data and sum up the reduced axis.
+ // Iterates through input_data and operates over the reduced axis
for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex))
{
unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {});
unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex,
numResolvedAxis, resolvedAxis);
input[inputOffset];
- tempSum[outputOffset] += input.Get();
+ auto inputValue = input.Get();
+ if (reduceOperation == ReduceOperation::Max)
+ {
+ if (inputValue > tempOut[outputOffset])
+ {
+ tempOut[outputOffset] = inputValue;
+ }
+ }
+ else if (reduceOperation == ReduceOperation::Min)
+ {
+ if (inputValue < tempOut[outputOffset])
+ {
+ tempOut[outputOffset] = inputValue;
+ }
+ }
+ else
+ {
+ tempOut[outputOffset] += inputValue;
+ }
}
- // Takes average by num of elements added to get mean.
+ // Takes average by num of elements added to get MEAN
size_t numElementsInAxis = 1;
for (unsigned int idx = 0; idx < numResolvedAxis; ++idx)
{
@@ -132,19 +144,21 @@ void Reduce(const TensorInfo& inputInfo,
(std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis)));
numElementsInAxis *= current;
}
- if (numElementsInAxis > 0) {
- for (unsigned int idx = 0; idx < numOutputs; ++idx)
+
+ for (unsigned int idx = 0; idx < numOutputs; ++idx)
+ {
+ output[idx];
+ if (reduceOperation == ReduceOperation::Mean)
{
- output[idx];
- if (reduceOperation == ReduceOperation::Sum)
- {
- output.Set(tempSum[idx]);
- }
- else if (reduceOperation == ReduceOperation::Mean)
+ if (numElementsInAxis > 0)
{
- output.Set(tempSum[idx] / armnn::numeric_cast<float>(numElementsInAxis));
+ output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis));
}
}
+ else
+ {
+ output.Set(tempOut[idx]);
+ }
}
}