diff options
author | Sadik Armagan <sadik.armagan@arm.com> | 2021-02-09 10:28:54 +0000 |
---|---|---|
committer | Sadik Armagan <sadik.armagan@arm.com> | 2021-02-09 10:31:14 +0000 |
commit | a2747487fbe7eb6d9f5357c6d16c32355ed6e01c (patch) | |
tree | 6f6f8b38100d16f1ec8a0e5be71e8e6ae1cc600a /src/backends | |
parent | ac001eebca101f2df4973d2f1d8cfca026e07419 (diff) | |
download | armnn-a2747487fbe7eb6d9f5357c6d16c32355ed6e01c.tar.gz |
MLCE-347 'REDUCE_MIN, REDUCE_MAX, REDUCE_SUM Support'
* Added TfLiteParser support for REDUCE_MIN and REDUCE_MAX operators
* Added ACL workloads support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators
* Added TfLite Delegate support for REDUCE_MIN, REDUCE_MAX, and REDUCE_SUM operators
Signed-off-by: Sadik Armagan <sadik.armagan@arm.com>
Change-Id: I8085d59946bfd4ab78a59a61f899031ae53371a8
Diffstat (limited to 'src/backends')
29 files changed, 718 insertions, 34 deletions
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp index 2a0707872e..d9efab288f 100644 --- a/src/backends/aclCommon/ArmComputeUtils.hpp +++ b/src/backends/aclCommon/ArmComputeUtils.hpp @@ -255,4 +255,16 @@ inline unsigned int ComputePositiveAxis(const int& axis, const armnn::TensorInfo return static_cast<unsigned int>(positiveAxis); } +inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const ReduceDescriptor& descriptor) +{ + switch (descriptor.m_ReduceOperation) + { + case ReduceOperation::Sum: return arm_compute::ReductionOperation::SUM; + case ReduceOperation::Mean: return arm_compute::ReductionOperation::MEAN_SUM; + case ReduceOperation::Max: return arm_compute::ReductionOperation::MAX; + case ReduceOperation::Min: return arm_compute::ReductionOperation::MIN; + default: throw InvalidArgumentException("Unsupported Reduction operation"); + } +} + } // namespace armnn diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 3b6299daf3..54c791677f 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -75,6 +75,7 @@ COMMON_TEST_SOURCES := \ test/layerTests/PadTestImpl.cpp \ test/layerTests/Pooling2dTestImpl.cpp \ test/layerTests/RankTestImpl.cpp \ + test/layerTests/ReductionTestImpl.cpp \ test/layerTests/ReduceSumTestImpl.cpp \ test/layerTests/ReshapeTestImpl.cpp \ test/layerTests/ResizeTestImpl.cpp \ diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index b20ef2dd25..f92e0745d3 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -137,6 +137,8 @@ list(APPEND armnnBackendsCommonUnitTests_sources layerTests/QuantizeTestImpl.hpp layerTests/RankTestImpl.cpp layerTests/RankTestImpl.hpp + layerTests/ReductionTestImpl.cpp + layerTests/ReductionTestImpl.hpp layerTests/ReduceSumTestImpl.cpp layerTests/ReduceSumTestImpl.hpp layerTests/ReshapeTestImpl.cpp diff --git a/src/backends/backendsCommon/test/LayerTests.hpp b/src/backends/backendsCommon/test/LayerTests.hpp index d87a3b08ab..a7dcb9988f 100644 --- a/src/backends/backendsCommon/test/LayerTests.hpp +++ b/src/backends/backendsCommon/test/LayerTests.hpp @@ -48,6 +48,7 @@ #include <backendsCommon/test/layerTests/PreluTestImpl.hpp> #include <backendsCommon/test/layerTests/QuantizeTestImpl.hpp> #include <backendsCommon/test/layerTests/RankTestImpl.hpp> +#include <backendsCommon/test/layerTests/ReductionTestImpl.hpp> #include <backendsCommon/test/layerTests/ReduceSumTestImpl.hpp> #include <backendsCommon/test/layerTests/ReshapeTestImpl.hpp> #include <backendsCommon/test/layerTests/ResizeTestImpl.hpp> diff --git a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp index 4edbd1108a..18821b9549 100644 --- a/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp +++ b/src/backends/backendsCommon/test/layerTests/ReduceSumTestImpl.cpp @@ -24,7 +24,8 @@ LayerTestResult<float, 4> ReduceTestCommon( const std::vector<float>& inputData, const std::vector<float>& outputData, const std::vector<int32_t> vAxis, - const armnn::ReduceOperation reduceOperation) + const armnn::ReduceOperation reduceOperation, + bool keepDims = false) { IgnoreUnused(memoryManager); auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo)); @@ -53,6 +54,7 @@ LayerTestResult<float, 4> ReduceTestCommon( descriptor.m_Parameters.m_vAxis = updated_idx; descriptor.m_Parameters.m_ReduceOperation = reduceOperation; + descriptor.m_Parameters.m_KeepDims = keepDims; armnn::WorkloadInfo info; AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); @@ -268,7 +270,8 @@ LayerTestResult<float, 4> ReduceSumSingleAxisTest3( inputValues, outputValues, { 3 }, - armnn::ReduceOperation::Sum); + armnn::ReduceOperation::Sum, + true); } template<armnn::DataType ArmnnType, typename T> diff --git a/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp new file mode 100644 index 0000000000..589cc03cbc --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.cpp @@ -0,0 +1,315 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ReductionTestImpl.hpp" + +#include <backendsCommon/test/DataTypeUtils.hpp> +#include <backendsCommon/test/TensorCopyUtils.hpp> +#include <backendsCommon/test/WorkloadTestUtils.hpp> + +#include <test/TensorHelpers.hpp> + +#include <iostream> + +namespace +{ + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<float, 4> ReductionTestCommon( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory, + const armnn::TensorInfo inputTensorInfo, + const armnn::TensorInfo outputTensorInfo, + const std::vector<float>& inputData, + const std::vector<float>& outputData, + const std::vector<int32_t> vAxis, + const armnn::ReduceOperation reduceOperation, + bool keepDims = false) +{ + IgnoreUnused(memoryManager); + auto inputTensor = MakeTensor<T, 4>(inputTensorInfo, ConvertToDataType<ArmnnType>(inputData, inputTensorInfo)); + + LayerTestResult<float, 4> result(outputTensorInfo); + result.outputExpected = MakeTensor<float, 4>(outputTensorInfo, outputData); + + std::unique_ptr<armnn::ITensorHandle> inputHandle = tensorHandleFactory.CreateTensorHandle(inputTensorInfo); + std::unique_ptr<armnn::ITensorHandle> outputHandle = tensorHandleFactory.CreateTensorHandle(outputTensorInfo); + + armnn::ReduceQueueDescriptor descriptor; + std::vector<uint32_t> updated_idx; + uint32_t resolvedAxis = 0; + for (uint32_t i = 0; i < vAxis.size(); ++i) + { + if (vAxis[i] < 0) + { + resolvedAxis = inputTensorInfo.GetNumDimensions() + static_cast<uint32_t>(vAxis[i]); + } else + { + resolvedAxis = static_cast<uint32_t>(vAxis[i]); + } + + updated_idx.push_back(resolvedAxis); + } + + descriptor.m_Parameters.m_vAxis = updated_idx; + descriptor.m_Parameters.m_ReduceOperation = reduceOperation; + descriptor.m_Parameters.m_KeepDims = keepDims; + armnn::WorkloadInfo info; + + AddInputToWorkload(descriptor, info, inputTensorInfo, inputHandle.get()); + AddOutputToWorkload(descriptor, info, outputTensorInfo, outputHandle.get()); + + std::unique_ptr<armnn::IWorkload> workload = workloadFactory.CreateReduce(descriptor, info); + + inputHandle->Allocate(); + outputHandle->Allocate(); + + CopyDataToITensorHandle(inputHandle.get(), inputTensor.origin()); + + workload->Execute(); + + CopyDataFromITensorHandle(result.output.origin(), outputHandle.get()); + + return result; +} + +} // namespace + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<float, 4> ReduceMaxSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 1, 3}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector<float> inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector<float> outputValues + ({ + 1001.0f, 1002.0f, 1003.0f + }); + + return ReductionTestCommon<ArmnnType>(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { 2 }, + armnn::ReduceOperation::Max); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<float, 4> ReduceMaxNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 2, 1}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector<float> inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector<float> outputValues + ({ + 1003.0f, 1002.0f + }); + + return ReductionTestCommon<ArmnnType>(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { -1 }, + armnn::ReduceOperation::Max, + true); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<float, 4> ReduceMaxSimpleTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 2, 1 }; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector<float> inputValues + ({ + 1.0f, 3.0f, 2.0f, + 6.0f, 4.0f, 5.0f + }); + + std::vector<float> outputValues + ({ + 3.0f, 6.0f + }); + + return ReductionTestCommon<ArmnnType>(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { 3 }, + armnn::ReduceOperation::Max, + true); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<float, 4> ReduceMinSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape { 1, 1, 2, 3 }; + const armnn::TensorShape outputShape { 1, 1, 1, 3}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector<float> inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector<float> outputValues + ({ + 10.0f, 11.0f, 12.0f + }); + + return ReductionTestCommon<ArmnnType>(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { 2 }, + armnn::ReduceOperation::Min); +} + +template<armnn::DataType ArmnnType, typename T> +LayerTestResult<float, 4> ReduceMinNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory) +{ + const armnn::TensorShape inputShape{ 1, 1, 2, 3 }; + const armnn::TensorShape outputShape{ 1, 1, 2, 1}; + + armnn::TensorInfo inputTensorInfo(inputShape, ArmnnType); + + if (armnn::IsQuantizedType<T>()) + { + inputTensorInfo.SetQuantizationScale(1.0f); + inputTensorInfo.SetQuantizationOffset(0); + } + + armnn::TensorInfo outputTensorInfo(outputShape, armnn::DataType::Float32); + + std::vector<float> inputValues + ({ + 1001.0f, 11.0f, 1003.0f, + 10.0f, 1002.0f, 12.0f + }); + std::vector<float> outputValues + ({ + 11.0f, 10.0f + }); + + return ReductionTestCommon<ArmnnType>(workloadFactory, + memoryManager, + tensorHandleFactory, + inputTensorInfo, + outputTensorInfo, + inputValues, + outputValues, + { -1 }, + armnn::ReduceOperation::Min, + true); +} + +// Explicit template specializations +template LayerTestResult<float, 4> +ReduceMaxSimpleTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<float, 4> +ReduceMaxNegativeAxisTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<float, 4> +ReduceMaxSimpleTest2<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<float, 4> +ReduceMinSimpleTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template LayerTestResult<float, 4> +ReduceMinNegativeAxisTest<armnn::DataType::Float32>( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + diff --git a/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp new file mode 100644 index 0000000000..495a74b64f --- /dev/null +++ b/src/backends/backendsCommon/test/layerTests/ReductionTestImpl.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "LayerTestResult.hpp" + +#include <ResolveType.hpp> + +#include <armnn/backends/IBackendInternal.hpp> +#include <backendsCommon/WorkloadFactory.hpp> + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<float, 4> ReduceMaxSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<float, 4> ReduceMaxNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<float, 4> ReduceMaxSimpleTest2( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<float, 4> ReduceMinSimpleTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); + +template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>> +LayerTestResult<float, 4> ReduceMinNegativeAxisTest( + armnn::IWorkloadFactory& workloadFactory, + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ITensorHandleFactory& tensorHandleFactory); diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index 65454d4fc4..0ab79309a0 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -60,6 +60,7 @@ #include "workloads/ClQLstmWorkload.hpp" #include "workloads/ClQuantizedLstmWorkload.hpp" #include "workloads/ClQuantizeWorkload.hpp" +#include "workloads/ClReduceWorkload.hpp" #include "workloads/ClReshapeWorkload.hpp" #include "workloads/ClResizeWorkload.hpp" #include "workloads/ClRsqrtWorkload.hpp" @@ -798,6 +799,18 @@ bool ClLayerSupport::IsQuantizeSupported(const TensorInfo& input, output); } +bool ClLayerSupport::IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClReduceWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool ClLayerSupport::IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index f2df94c8e2..8b873915dd 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -253,6 +253,11 @@ public: const TensorInfo& output, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index d65b26314e..ee6bcd3bc3 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -575,6 +575,12 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateRank(const RankQueueDescript return std::make_unique<ClRankWorkload>(descriptor, info); } +std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<ClReduceWorkload>(descriptor, info); +} + std::unique_ptr<IWorkload> ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 66aea8498f..1d2c572103 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -206,6 +206,9 @@ public: std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 9514750563..9a83257272 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -66,6 +66,7 @@ BACKEND_SOURCES := \ workloads/ClQLstmWorkload.cpp \ workloads/ClQuantizedLstmWorkload.cpp \ workloads/ClQuantizeWorkload.cpp \ + workloads/ClReduceWorkload.cpp \ workloads/ClReshapeWorkload.cpp \ workloads/ClResizeWorkload.cpp \ workloads/ClRsqrtWorkload.cpp \ diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 018a62df95..013965c445 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -1271,6 +1271,21 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast1, LogicalOrBroadcast1Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test) +// ReduceSum +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>) + +// ReduceMax +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>) + +// ReduceMin +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>) + #if defined(ARMNNREF_ENABLED) // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index 7427ea018d..3a1b6b8432 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -87,6 +87,8 @@ list(APPEND armnnClBackendWorkloads_sources ClQuantizeWorkload.cpp ClQuantizeWorkload.hpp ClRankWorkload.hpp + ClReduceWorkload.cpp + ClReduceWorkload.hpp ClReshapeWorkload.cpp ClReshapeWorkload.hpp ClResizeWorkload.cpp diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp new file mode 100644 index 0000000000..6f594ff7a9 --- /dev/null +++ b/src/backends/cl/workloads/ClReduceWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReduceWorkload.hpp" + +#include <cl/ClTensorHandle.hpp> +#include <aclCommon/ArmComputeUtils.hpp> +#include <aclCommon/ArmComputeTensorUtils.hpp> + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "ClReduceWorkload: Reduction is supported only on 1 axis."); + } + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), + input.GetNumDimensions(), + desc.m_vAxis); + + + return arm_compute::CLReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast<unsigned int>(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); +} + +ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload<ReduceQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReduceWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), + info.m_InputTensorInfos[0].GetNumDimensions(), + m_Data.m_Parameters.m_vAxis); + m_Layer.configure(&input, + &output, + static_cast<unsigned int>(coords[0]), + ConvertReductionOperationToAcl(m_Data.m_Parameters), + m_Data.m_Parameters.m_KeepDims); +} + +void ClReduceWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReduceWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClReduceWorkload.hpp b/src/backends/cl/workloads/ClReduceWorkload.hpp new file mode 100644 index 0000000000..8481eeea5a --- /dev/null +++ b/src/backends/cl/workloads/ClReduceWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/CL/functions/CLReductionOperation.h> + +namespace armnn +{ + +arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc); + +class ClReduceWorkload : public BaseWorkload<ReduceQueueDescriptor> +{ +public: + ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReductionOperation m_Layer; +}; + +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index 0045e7a77f..f99a9fa11b 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -44,6 +44,7 @@ #include "ClQuantizeWorkload.hpp" #include "ClQuantizedLstmWorkload.hpp" #include "ClRankWorkload.hpp" +#include "ClReduceWorkload.hpp" #include "ClReshapeWorkload.hpp" #include "ClResizeWorkload.hpp" #include "ClRsqrtWorkload.hpp" diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 2d22576e57..66999c1a30 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -58,6 +58,7 @@ #include "workloads/NeonQLstmWorkload.hpp" #include "workloads/NeonQuantizeWorkload.hpp" #include "workloads/NeonQuantizedLstmWorkload.hpp" +#include "workloads/NeonReduceWorkload.hpp" #include "workloads/NeonReshapeWorkload.hpp" #include "workloads/NeonResizeWorkload.hpp" #include "workloads/NeonRsqrtWorkload.hpp" @@ -784,6 +785,18 @@ bool NeonLayerSupport::IsQuantizedLstmSupported(const TensorInfo& input, paramsInfo); } +bool NeonLayerSupport::IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonReduceWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index dc13cc2e4e..2ae1b0d489 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -263,6 +263,11 @@ public: const QuantizedLstmInputParamsInfo& paramsInfo, Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsReduceSupported(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& descriptor, + Optional<std::string&> reasonIfUnsupported = EmptyOptional()) const override; + bool IsReshapeSupported(const TensorInfo& input, const TensorInfo& output, const ReshapeDescriptor& descriptor, diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 0d36110da5..7d0942874e 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -497,6 +497,12 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateRank(const RankQueueDescri return std::make_unique<NeonRankWorkload>(descriptor, info); } +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<NeonReduceWorkload>(descriptor, info); +} + std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const { diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 745dece659..4817a06540 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -207,6 +207,9 @@ public: std::unique_ptr<IWorkload> CreateRank(const RankQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + std::unique_ptr<IWorkload> CreateReduce(const ReduceQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const override; diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 54560cb0fa..6feeeb5f2c 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -66,6 +66,7 @@ BACKEND_SOURCES := \ workloads/NeonQLstmWorkload.cpp \ workloads/NeonQuantizedLstmWorkload.cpp \ workloads/NeonQuantizeWorkload.cpp \ + workloads/NeonReduceWorkload.cpp \ workloads/NeonReshapeWorkload.cpp \ workloads/NeonResizeWorkload.cpp \ workloads/NeonRsqrtWorkload.cpp \ diff --git a/src/backends/neon/test/NeonLayerTests.cpp b/src/backends/neon/test/NeonLayerTests.cpp index d351870645..8434a67082 100644 --- a/src/backends/neon/test/NeonLayerTests.cpp +++ b/src/backends/neon/test/NeonLayerTests.cpp @@ -1372,6 +1372,21 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast1, LogicalOrBroadcast1Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast2, LogicalOrBroadcast2Test) ARMNN_AUTO_TEST_CASE_WITH_THF(LogicalOrBroadcast3, LogicalOrBroadcast3Test) +// ReduceSum +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumFloat32, ReduceSumSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_1, ReduceSumSingleAxisTest1<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisTest2<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>) + +// ReduceMax +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>) + +// ReduceMin +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>) + #if defined(ARMNNREF_ENABLED) // The ARMNN_COMPARE_REF_AUTO_TEST_CASE and the ARMNN_COMPARE_REF_FIXTURE_TEST_CASE test units are not available diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index f1a723b324..7c2b185ec3 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -93,6 +93,8 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonQuantizeWorkload.cpp NeonQuantizeWorkload.hpp NeonRankWorkload.hpp + NeonReduceWorkload.cpp + NeonReduceWorkload.hpp NeonReshapeWorkload.cpp NeonReshapeWorkload.hpp NeonResizeWorkload.cpp diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp new file mode 100644 index 0000000000..0e1b46a3a1 --- /dev/null +++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReduceWorkload.hpp" + +#include <aclCommon/ArmComputeUtils.hpp> +#include <aclCommon/ArmComputeTensorUtils.hpp> + +#include <neon/NeonTensorHandle.hpp> + +#include "NeonWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "NeonReduceWorkload: Reduction is supported only on 1 axis."); + } + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), + input.GetNumDimensions(), + desc.m_vAxis); + + return arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast<unsigned int>(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); +} + +NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload<ReduceQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), + info.m_InputTensorInfos[0].GetNumDimensions(), + m_Data.m_Parameters.m_vAxis); + m_Layer.configure(&input, + &output, + static_cast<unsigned int>(coords[0]), + ConvertReductionOperationToAcl(m_Data.m_Parameters), + m_Data.m_Parameters.m_KeepDims); +} + +void NeonReduceWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp new file mode 100644 index 0000000000..0472091fbf --- /dev/null +++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/NEON/functions/NEReductionOperation.h> + +namespace armnn +{ + +arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ReduceDescriptor& desc); + +class NeonReduceWorkload : public BaseWorkload<ReduceQueueDescriptor> +{ +public: + NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::NEReductionOperation m_Layer; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp index 949100d50a..4eb526a04d 100644 --- a/src/backends/neon/workloads/NeonWorkloads.hpp +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -49,6 +49,7 @@ #include "NeonQuantizedLstmWorkload.hpp" #include "NeonQuantizeWorkload.hpp" #include "NeonRankWorkload.hpp" +#include "NeonReduceWorkload.hpp" #include "NeonReshapeWorkload.hpp" #include "NeonResizeWorkload.hpp" #include "NeonRsqrtWorkload.hpp" diff --git a/src/backends/reference/test/RefLayerTests.cpp b/src/backends/reference/test/RefLayerTests.cpp index d5e0f8290b..161476ed98 100644 --- a/src/backends/reference/test/RefLayerTests.cpp +++ b/src/backends/reference/test/RefLayerTests.cpp @@ -2241,4 +2241,13 @@ ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_2, ReduceSumSingleAxisT ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumSingleAxisFloat32_3, ReduceSumSingleAxisTest3<DataType::Float32>) ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceSumMultipleAxisFloat32, ReduceSumMultipleAxisTest<DataType::Float32>) +// ReduceMax +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxFloat32, ReduceMaxSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMaxNegativeAxisFloat32, ReduceMaxNegativeAxisTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMax2Float32, ReduceMaxSimpleTest2<DataType::Float32>) + +// ReduceMin +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinFloat32, ReduceMinSimpleTest<DataType::Float32>) +ARMNN_AUTO_TEST_CASE_WITH_THF(ReduceMinNegativeAxisFloat32, ReduceMinNegativeAxisTest<DataType::Float32>) + BOOST_AUTO_TEST_SUITE_END() diff --git a/src/backends/reference/workloads/Reduce.cpp b/src/backends/reference/workloads/Reduce.cpp index 5375c7163a..31c6262c9a 100644 --- a/src/backends/reference/workloads/Reduce.cpp +++ b/src/backends/reference/workloads/Reduce.cpp @@ -75,33 +75,27 @@ void Reduce(const TensorInfo& inputInfo, const std::vector<uint32_t> axis, const ReduceOperation reduceOperation) { - unsigned int inputNumDims = inputInfo.GetNumDimensions(); - unsigned int outputNumDims = outputInfo.GetNumDimensions(); - - armnn::TensorShape outputDims = outputInfo.GetShape(); armnn::TensorShape inputDims = inputInfo.GetShape(); + unsigned int inputNumDims = inputInfo.GetNumDimensions(); + unsigned int numOutputs = outputInfo.GetNumElements(); - // Initialise output data. - unsigned int numOutputs = 1; - for (unsigned int idx = 0; idx < outputNumDims; ++idx) + // Initialise temp output + std::vector<float> tempOut(numOutputs); + if (reduceOperation == ReduceOperation::Max || reduceOperation == ReduceOperation::Min) { - numOutputs *= outputDims[idx]; + for (unsigned int idx = 0; idx < numOutputs; ++idx) + { + input[idx]; + tempOut[idx] = input.Get(); + } } - - std::vector<float> tempSum(numOutputs); - for (unsigned int idx = 0; idx < numOutputs; ++idx) + else { - output[idx]; - output.Set(0.0f); - tempSum[idx] = 0.0f; + std::fill(tempOut.begin(), tempOut.end(), 0.0); } - // Initialise temp index. - std::vector<unsigned int> tempIndex(inputNumDims); - for (unsigned int idx = 0; idx < inputNumDims; ++idx) - { - tempIndex[idx] = 0; - } + // Initialise temp index + std::vector<unsigned int> tempIndex(inputNumDims, 0); std::vector<unsigned int> resolvedAxis = axis; if (resolvedAxis.empty()) @@ -113,17 +107,35 @@ void Reduce(const TensorInfo& inputInfo, } auto numResolvedAxis = armnn::numeric_cast<unsigned int>(resolvedAxis.size()); - // Iterates through input_data and sum up the reduced axis. + // Iterates through input_data and operates over the reduced axis for (bool hasNext = true; hasNext; hasNext = NextIndex(inputNumDims, inputDims, tempIndex)) { unsigned int inputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, 0, {}); unsigned int outputOffset = ReducedOutputOffset(inputNumDims, inputDims, tempIndex, numResolvedAxis, resolvedAxis); input[inputOffset]; - tempSum[outputOffset] += input.Get(); + auto inputValue = input.Get(); + if (reduceOperation == ReduceOperation::Max) + { + if (inputValue > tempOut[outputOffset]) + { + tempOut[outputOffset] = inputValue; + } + } + else if (reduceOperation == ReduceOperation::Min) + { + if (inputValue < tempOut[outputOffset]) + { + tempOut[outputOffset] = inputValue; + } + } + else + { + tempOut[outputOffset] += inputValue; + } } - // Takes average by num of elements added to get mean. + // Takes average by num of elements added to get MEAN size_t numElementsInAxis = 1; for (unsigned int idx = 0; idx < numResolvedAxis; ++idx) { @@ -132,19 +144,21 @@ void Reduce(const TensorInfo& inputInfo, (std::numeric_limits<float>::max() / armnn::numeric_cast<float>(numElementsInAxis))); numElementsInAxis *= current; } - if (numElementsInAxis > 0) { - for (unsigned int idx = 0; idx < numOutputs; ++idx) + + for (unsigned int idx = 0; idx < numOutputs; ++idx) + { + output[idx]; + if (reduceOperation == ReduceOperation::Mean) { - output[idx]; - if (reduceOperation == ReduceOperation::Sum) - { - output.Set(tempSum[idx]); - } - else if (reduceOperation == ReduceOperation::Mean) + if (numElementsInAxis > 0) { - output.Set(tempSum[idx] / armnn::numeric_cast<float>(numElementsInAxis)); + output.Set(tempOut[idx] / armnn::numeric_cast<float>(numElementsInAxis)); } } + else + { + output.Set(tempOut[idx]); + } } } |