aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl')
-rw-r--r--src/backends/cl/ClLayerSupport.cpp11
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp2
-rw-r--r--src/backends/cl/backend.mk1
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp34
-rwxr-xr-xsrc/backends/cl/test/ClLayerTests.cpp15
-rw-r--r--src/backends/cl/workloads/CMakeLists.txt2
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.cpp100
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.hpp31
-rw-r--r--src/backends/cl/workloads/ClWorkloads.hpp1
9 files changed, 189 insertions, 8 deletions
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index 3ca8bb5c46..6c5704d7ab 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -26,6 +26,7 @@
#include "workloads/ClFullyConnectedWorkload.hpp"
#include "workloads/ClL2NormalizationFloatWorkload.hpp"
#include "workloads/ClLstmFloatWorkload.hpp"
+#include "workloads/ClMeanWorkload.hpp"
#include "workloads/ClMultiplicationWorkload.hpp"
#include "workloads/ClNormalizationFloatWorkload.hpp"
#include "workloads/ClPadWorkload.hpp"
@@ -372,11 +373,11 @@ bool ClLayerSupport::IsMeanSupported(const TensorInfo& input,
const MeanDescriptor& descriptor,
Optional<std::string&> reasonIfUnsupported) const
{
- ignore_unused(input);
- ignore_unused(output);
- ignore_unused(descriptor);
- ignore_unused(reasonIfUnsupported);
- return false;
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMeanValidate,
+ reasonIfUnsupported,
+ input,
+ output,
+ descriptor);
}
bool ClLayerSupport::IsMergerSupported(const std::vector<const TensorInfo*> inputs,
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index fd92db34d5..08ee9e922d 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -303,7 +303,7 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16(
std::unique_ptr<IWorkload> ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor,
const WorkloadInfo& info) const
{
- return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info);
+ return std::make_unique<ClMeanWorkload>(descriptor, info);
}
std::unique_ptr<IWorkload> ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor,
diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk
index 996db3fbfd..97df8e4903 100644
--- a/src/backends/cl/backend.mk
+++ b/src/backends/cl/backend.mk
@@ -26,6 +26,7 @@ BACKEND_SOURCES := \
workloads/ClFullyConnectedWorkload.cpp \
workloads/ClL2NormalizationFloatWorkload.cpp \
workloads/ClLstmFloatWorkload.cpp \
+ workloads/ClMeanWorkload.cpp \
workloads/ClMultiplicationWorkload.cpp \
workloads/ClNormalizationFloatWorkload.cpp \
workloads/ClPadWorkload.cpp \
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index 4f9989405d..2a705de99b 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -14,8 +14,6 @@
#include <backends/cl/workloads/ClWorkloads.hpp>
#include <backends/cl/workloads/ClWorkloadUtils.hpp>
-#include <backends/reference/RefWorkloadFactory.hpp>
-
boost::test_tools::predicate_result CompareIClTensorHandleShape(IClTensorHandle* tensorHandle,
std::initializer_list<unsigned int> expectedDimensions)
{
@@ -739,4 +737,36 @@ BOOST_AUTO_TEST_CASE(CreateResizeBilinearFloat16NhwcWorkload)
ClResizeBilinearWorkloadTest<ClResizeBilinearFloatWorkload, armnn::DataType::Float16>(DataLayout::NHWC);
}
+template <typename MeanWorkloadType, typename armnn::DataType DataType>
+static void ClMeanWorkloadTest()
+{
+ Graph graph;
+ ClWorkloadFactory factory;
+ auto workload = CreateMeanWorkloadTest<MeanWorkloadType, DataType>(factory, graph);
+
+ // Checks that inputs/outputs are as we expect them (see definition of CreateMeanWorkloadTest).
+ MeanQueueDescriptor queueDescriptor = workload->GetData();
+ auto inputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
+ auto outputHandle = boost::polymorphic_downcast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
+
+ // The first dimension (batch size) in both input and output is singular thus it has been reduced by ACL.
+ BOOST_TEST(CompareIClTensorHandleShape(inputHandle, { 3, 7, 4 }));
+ BOOST_TEST(CompareIClTensorHandleShape(outputHandle, { 4 }));
+}
+
+BOOST_AUTO_TEST_CASE(CreateMeanFloat32Workload)
+{
+ ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float32>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateMeanFloat16Workload)
+{
+ ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::Float16>();
+}
+
+BOOST_AUTO_TEST_CASE(CreateMeanUint8Workload)
+{
+ ClMeanWorkloadTest<ClMeanWorkload, armnn::DataType::QuantisedAsymm8>();
+}
+
BOOST_AUTO_TEST_SUITE_END()
diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp
index d5e941977a..937c58c689 100755
--- a/src/backends/cl/test/ClLayerTests.cpp
+++ b/src/backends/cl/test/ClLayerTests.cpp
@@ -274,6 +274,21 @@ ARMNN_AUTO_TEST_CASE(SimpleConvertFp32ToFp16, SimpleConvertFp32ToFp16Test)
ARMNN_AUTO_TEST_CASE(AdditionAfterMaxPool, AdditionAfterMaxPoolTest)
+// Mean
+ARMNN_AUTO_TEST_CASE(MeanUint8Simple, MeanUint8SimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8SimpleAxis, MeanUint8SimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8KeepDims, MeanUint8KeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanUint8MultipleDims, MeanUint8MultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsUint8, MeanVtsUint8Test)
+
+ARMNN_AUTO_TEST_CASE(MeanFloatSimple, MeanFloatSimpleTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatSimpleAxis, MeanFloatSimpleAxisTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatKeepDims, MeanFloatKeepDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanFloatMultipleDims, MeanFloatMultipleDimsTest)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat1, MeanVtsFloat1Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat2, MeanVtsFloat2Test)
+ARMNN_AUTO_TEST_CASE(MeanVtsFloat3, MeanVtsFloat3Test)
+
// ============================================================================
// COMPARE tests
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 59a45facea..86c3804244 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -30,6 +30,8 @@ list(APPEND armnnClBackendWorkloads_sources
ClL2NormalizationFloatWorkload.hpp
ClLstmFloatWorkload.cpp
ClLstmFloatWorkload.hpp
+ ClMeanWorkload.cpp
+ ClMeanWorkload.hpp
ClMergerWorkload.hpp
ClMultiplicationWorkload.cpp
ClMultiplicationWorkload.hpp
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
new file mode 100644
index 0000000000..7e9649b1b6
--- /dev/null
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClMeanWorkload.hpp"
+
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace
+{
+
+void ConvertArmnnAxesToAclCoordinates(size_t inputDimensions,
+ unsigned int originalInputRank,
+ const std::vector<unsigned int>& armnnAxes,
+ arm_compute::Coordinates& outAclCoords)
+{
+ if (armnnAxes.empty())
+ {
+ // If no reduction axes were provided, then the input must be reduced along all dimensions.
+ // Since arm_compute::CLReduceMean does not accept an empty vector as the reduction dimensions, we then
+ // manually create a vector including all the input dimensions (in reversed order) as:
+ //
+ // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
+ //
+ outAclCoords.set_num_dimensions(inputDimensions);
+ std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
+ }
+ else
+ {
+ // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
+ //
+ // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
+ // dimension correction).
+ // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
+ // new value for that reduction axis should be 1.
+ //
+ // Example:
+ // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
+ // ArmNN reduction axis = { 2 } -> ACL reduction axis = { 1 }
+ // ArmNN reduction axis = { 3 } -> ACL reduction axis = { 0 }
+ //
+ // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
+ //
+ outAclCoords.set_num_dimensions(armnnAxes.size());
+ std::transform(armnnAxes.begin(), armnnAxes.end(),
+ outAclCoords.begin(),
+ [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
+ }
+}
+
+} // anonymous namespace
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClMeanValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const MeanDescriptor& desc)
+{
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ arm_compute::Coordinates coords;
+ ConvertArmnnAxesToAclCoordinates(aclInputInfo.num_dimensions(),
+ input.GetNumDimensions(),
+ desc.m_Axis,
+ coords);
+
+ return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+}
+
+ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : BaseWorkload<MeanQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::Coordinates coords;
+ ConvertArmnnAxesToAclCoordinates(input.info()->num_dimensions(),
+ info.m_InputTensorInfos[0].GetNumDimensions(),
+ m_Data.m_Parameters.m_Axis,
+ coords);
+
+ m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+}
+
+void ClMeanWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
new file mode 100644
index 0000000000..c9f0356e04
--- /dev/null
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClMeanValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const MeanDescriptor& desc);
+
+class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
+{
+public:
+ ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ // Not using CLMeanStdDev, as 4D input tensor support for Mean has been added to a new function called CLReduceMean.
+ mutable arm_compute::CLReduceMean m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index 63de744be5..eeca40364c 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -16,6 +16,7 @@
#include "ClL2NormalizationFloatWorkload.hpp"
#include "ClLstmFloatWorkload.hpp"
#include "ClMergerWorkload.hpp"
+#include "ClMeanWorkload.hpp"
#include "ClMultiplicationWorkload.hpp"
#include "ClNormalizationFloatWorkload.hpp"
#include "ClPermuteWorkload.hpp"