From 28dcab6c176a3938519809aa9da7321e4ede7623 Mon Sep 17 00:00:00 2001
From: Matteo Martincigh <matteo.martincigh@arm.com>
Date: Fri, 19 Oct 2018 16:40:03 +0100
Subject: IVGCVSW-2049 + IVGCVSW-2051 Create the CL Mean Float workload and add
 the unit tests

 * Created the ClFloatWorkload class
 * Added ClMeanValidate validation function
 * Added helper function to convert the reduction axes from the ArmNN
   format to ACL's
 * Added workload tests
 * Added some unit tests
 * These changes need the CL pin to be pointing at least to revision
   88d871028eeae57f9e4536d0329110eccb5e2890 (COMPMID-1574 Implement
   ReduceMean in OpenCL)

!android-nn-driver:155033

Change-Id: I694fd36be0458c90e158172afde045fcc88c32ae
---
 src/backends/cl/workloads/CMakeLists.txt     |   2 +
 src/backends/cl/workloads/ClMeanWorkload.cpp | 100 +++++++++++++++++++++++++++
 src/backends/cl/workloads/ClMeanWorkload.hpp |  31 +++++++++
 src/backends/cl/workloads/ClWorkloads.hpp    |   1 +
 4 files changed, 134 insertions(+)
 create mode 100644 src/backends/cl/workloads/ClMeanWorkload.cpp
 create mode 100644 src/backends/cl/workloads/ClMeanWorkload.hpp

(limited to 'src/backends/cl/workloads')
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
index 59a45facea..86c3804244 100644
--- a/src/backends/cl/workloads/CMakeLists.txt
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -30,6 +30,8 @@ list(APPEND armnnClBackendWorkloads_sources
     ClL2NormalizationFloatWorkload.hpp
     ClLstmFloatWorkload.cpp
     ClLstmFloatWorkload.hpp
+    ClMeanWorkload.cpp
+    ClMeanWorkload.hpp
     ClMergerWorkload.hpp
     ClMultiplicationWorkload.cpp
     ClMultiplicationWorkload.hpp
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
new file mode 100644
index 0000000000..7e9649b1b6
--- /dev/null
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -0,0 +1,100 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClMeanWorkload.hpp"
+
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace
+{
+
+void ConvertArmnnAxesToAclCoordinates(size_t inputDimensions,
+                                      unsigned int originalInputRank,
+                                      const std::vector<unsigned int>& armnnAxes,
+                                      arm_compute::Coordinates& outAclCoords)
+{
+    if (armnnAxes.empty())
+    {
+        // If no reduction axes were provided, then the input must be reduced along all dimensions.
+        // Since arm_compute::CLReduceMean does not accept an empty vector as the reduction dimensions, we then
+        // manually create a vector including all the input dimensions (in reversed order) as:
+        //
+        // { inputDimensions - 1, inputDimensions - 2, ..., 1, 0 }
+        //
+        outAclCoords.set_num_dimensions(inputDimensions);
+        std::generate(outAclCoords.begin(), outAclCoords.end(), [d = inputDimensions - 1] () mutable { return d--; });
+    }
+    else
+    {
+        // Create a vector of reduction dimensions (in reversed order) with the given reduction axes.
+        //
+        // Adjust the given reduction axes according to the original rank of the input tensor (before ACL applied any
+        // dimension correction).
+        // For example, if the input tensor originally had 4 dimensions, and one of the reduction axes was 2, then the
+        // new value for that reduction axis should be 1.
+        //
+        // Example:
+        // ArmNN input shape = { 1, 1, 3, 2 } -> ACL input shape = { 2, 3 }
+        // ArmNN reduction axis = { 2 }       -> ACL reduction axis = { 1 }
+        // ArmNN reduction axis = { 3 }       -> ACL reduction axis = { 0 }
+        //
+        // The transformation: ACL reduction axis index = original rank - ArmNN reduction axis index - 1
+        //
+        outAclCoords.set_num_dimensions(armnnAxes.size());
+        std::transform(armnnAxes.begin(), armnnAxes.end(),
+                       outAclCoords.begin(),
+                       [originalInputRank](unsigned int i){ return originalInputRank - i - 1; });
+    }
+}
+
+} // anonymous namespace
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClMeanValidate(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const MeanDescriptor& desc)
+{
+    const arm_compute::TensorInfo aclInputInfo  = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+    const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+    arm_compute::Coordinates coords;
+    ConvertArmnnAxesToAclCoordinates(aclInputInfo.num_dimensions(),
+                                     input.GetNumDimensions(),
+                                     desc.m_Axis,
+                                     coords);
+
+    return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+}
+
+ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
+    : BaseWorkload<MeanQueueDescriptor>(descriptor, info)
+{
+    m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
+
+    arm_compute::ICLTensor& input  = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+    arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+    arm_compute::Coordinates coords;
+    ConvertArmnnAxesToAclCoordinates(input.info()->num_dimensions(),
+                                     info.m_InputTensorInfos[0].GetNumDimensions(),
+                                     m_Data.m_Parameters.m_Axis,
+                                     coords);
+
+    m_Layer.configure(&input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+}
+
+void ClMeanWorkload::Execute() const
+{
+    ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute");
+    m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
new file mode 100644
index 0000000000..c9f0356e04
--- /dev/null
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClMeanValidate(const TensorInfo& input,
+                                   const TensorInfo& output,
+                                   const MeanDescriptor& desc);
+
+class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
+{
+public:
+    ClMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+    void Execute() const override;
+
+private:
+    // Not using CLMeanStdDev, as 4D input tensor support for Mean has been added to a new function called CLReduceMean.
+    mutable arm_compute::CLReduceMean m_Layer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
index 63de744be5..eeca40364c 100644
--- a/src/backends/cl/workloads/ClWorkloads.hpp
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -16,6 +16,7 @@
 #include "ClL2NormalizationFloatWorkload.hpp"
 #include "ClLstmFloatWorkload.hpp"
 #include "ClMergerWorkload.hpp"
+#include "ClMeanWorkload.hpp"
 #include "ClMultiplicationWorkload.hpp"
 #include "ClNormalizationFloatWorkload.hpp"
 #include "ClPermuteWorkload.hpp"
-- 
cgit v1.2.1