aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/workloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl/workloads')
-rw-r--r--src/backends/cl/workloads/CMakeLists.txt92
-rw-r--r--src/backends/cl/workloads/ClActivationFloatWorkload.cpp56
-rw-r--r--src/backends/cl/workloads/ClActivationFloatWorkload.hpp29
-rw-r--r--src/backends/cl/workloads/ClActivationUint8Workload.cpp44
-rw-r--r--src/backends/cl/workloads/ClActivationUint8Workload.hpp29
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.cpp66
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.hpp31
-rw-r--r--src/backends/cl/workloads/ClBaseConstantWorkload.cpp64
-rw-r--r--src/backends/cl/workloads/ClBaseConstantWorkload.hpp30
-rw-r--r--src/backends/cl/workloads/ClBaseMergerWorkload.hpp28
-rw-r--r--src/backends/cl/workloads/ClBaseSplitterWorkload.hpp28
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp96
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp46
-rw-r--r--src/backends/cl/workloads/ClConstantFloatWorkload.cpp18
-rw-r--r--src/backends/cl/workloads/ClConstantFloatWorkload.hpp20
-rw-r--r--src/backends/cl/workloads/ClConstantUint8Workload.cpp18
-rw-r--r--src/backends/cl/workloads/ClConstantUint8Workload.hpp20
-rw-r--r--src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp66
-rw-r--r--src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp30
-rw-r--r--src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp66
-rw-r--r--src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp30
-rw-r--r--src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp48
-rw-r--r--src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp24
-rw-r--r--src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp81
-rw-r--r--src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp35
-rw-r--r--src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp81
-rw-r--r--src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp35
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp125
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp40
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp39
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp26
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp40
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp23
-rw-r--r--src/backends/cl/workloads/ClDivisionFloatWorkload.cpp48
-rw-r--r--src/backends/cl/workloads/ClDivisionFloatWorkload.hpp32
-rw-r--r--src/backends/cl/workloads/ClFloorFloatWorkload.cpp31
-rw-r--r--src/backends/cl/workloads/ClFloorFloatWorkload.hpp30
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp96
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.hpp43
-rw-r--r--src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp50
-rw-r--r--src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp35
-rw-r--r--src/backends/cl/workloads/ClLstmFloatWorkload.cpp391
-rw-r--r--src/backends/cl/workloads/ClLstmFloatWorkload.hpp68
-rw-r--r--src/backends/cl/workloads/ClMergerFloatWorkload.cpp20
-rw-r--r--src/backends/cl/workloads/ClMergerFloatWorkload.hpp22
-rw-r--r--src/backends/cl/workloads/ClMergerUint8Workload.cpp19
-rw-r--r--src/backends/cl/workloads/ClMergerUint8Workload.hpp21
-rw-r--r--src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp60
-rw-r--r--src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp34
-rw-r--r--src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp51
-rw-r--r--src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp29
-rw-r--r--src/backends/cl/workloads/ClPadWorkload.cpp63
-rw-r--r--src/backends/cl/workloads/ClPadWorkload.hpp32
-rw-r--r--src/backends/cl/workloads/ClPermuteWorkload.cpp56
-rw-r--r--src/backends/cl/workloads/ClPermuteWorkload.hpp42
-rw-r--r--src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp47
-rw-r--r--src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp33
-rw-r--r--src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp26
-rw-r--r--src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp22
-rw-r--r--src/backends/cl/workloads/ClPooling2dUint8Workload.cpp27
-rw-r--r--src/backends/cl/workloads/ClPooling2dUint8Workload.hpp25
-rw-r--r--src/backends/cl/workloads/ClReshapeFloatWorkload.cpp33
-rw-r--r--src/backends/cl/workloads/ClReshapeFloatWorkload.hpp28
-rw-r--r--src/backends/cl/workloads/ClReshapeUint8Workload.cpp31
-rw-r--r--src/backends/cl/workloads/ClReshapeUint8Workload.hpp29
-rw-r--r--src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp38
-rw-r--r--src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp25
-rw-r--r--src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp30
-rw-r--r--src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp17
-rw-r--r--src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp33
-rw-r--r--src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp30
-rw-r--r--src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp43
-rw-r--r--src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp31
-rw-r--r--src/backends/cl/workloads/ClSplitterFloatWorkload.cpp19
-rw-r--r--src/backends/cl/workloads/ClSplitterFloatWorkload.hpp20
-rw-r--r--src/backends/cl/workloads/ClSplitterUint8Workload.cpp19
-rw-r--r--src/backends/cl/workloads/ClSplitterUint8Workload.hpp21
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.cpp66
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.hpp31
-rw-r--r--src/backends/cl/workloads/ClWorkloadUtils.hpp63
-rw-r--r--src/backends/cl/workloads/ClWorkloads.hpp41
81 files changed, 3605 insertions, 0 deletions
diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt
new file mode 100644
index 0000000000..066c37f083
--- /dev/null
+++ b/src/backends/cl/workloads/CMakeLists.txt
@@ -0,0 +1,92 @@
+#
+# Copyright © 2017 Arm Ltd. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+
+list(APPEND armnnClBackendWorkloads_sources
+ ClActivationFloatWorkload.cpp
+ ClActivationFloatWorkload.hpp
+ ClActivationUint8Workload.cpp
+ ClActivationUint8Workload.hpp
+ ClAdditionWorkload.cpp
+ ClAdditionWorkload.hpp
+ ClBaseConstantWorkload.cpp
+ ClBaseConstantWorkload.hpp
+ ClBaseMergerWorkload.hpp
+ ClBaseSplitterWorkload.hpp
+ ClBatchNormalizationFloatWorkload.cpp
+ ClBatchNormalizationFloatWorkload.hpp
+ ClConstantFloatWorkload.cpp
+ ClConstantFloatWorkload.hpp
+ ClConstantUint8Workload.cpp
+ ClConstantUint8Workload.hpp
+ ClConvertFp16ToFp32Workload.cpp
+ ClConvertFp16ToFp32Workload.hpp
+ ClConvertFp32ToFp16Workload.cpp
+ ClConvertFp32ToFp16Workload.hpp
+ ClConvolution2dBaseWorkload.cpp
+ ClConvolution2dBaseWorkload.hpp
+ ClConvolution2dFloatWorkload.cpp
+ ClConvolution2dFloatWorkload.hpp
+ ClConvolution2dUint8Workload.cpp
+ ClConvolution2dUint8Workload.hpp
+ ClDepthwiseConvolutionBaseWorkload.cpp
+ ClDepthwiseConvolutionBaseWorkload.hpp
+ ClDepthwiseConvolutionFloatWorkload.cpp
+ ClDepthwiseConvolutionFloatWorkload.hpp
+ ClDepthwiseConvolutionUint8Workload.cpp
+ ClDepthwiseConvolutionUint8Workload.hpp
+ ClDivisionFloatWorkload.cpp
+ ClDivisionFloatWorkload.hpp
+ ClFloorFloatWorkload.cpp
+ ClFloorFloatWorkload.hpp
+ ClFullyConnectedWorkload.cpp
+ ClFullyConnectedWorkload.hpp
+ ClL2NormalizationFloatWorkload.cpp
+ ClL2NormalizationFloatWorkload.hpp
+ ClLstmFloatWorkload.cpp
+ ClLstmFloatWorkload.hpp
+ ClMergerFloatWorkload.cpp
+ ClMergerFloatWorkload.hpp
+ ClMergerUint8Workload.cpp
+ ClMergerUint8Workload.hpp
+ ClMultiplicationFloatWorkload.cpp
+ ClMultiplicationFloatWorkload.hpp
+ ClNormalizationFloatWorkload.cpp
+ ClNormalizationFloatWorkload.hpp
+ ClPadWorkload.cpp
+ ClPadWorkload.hpp
+ ClPermuteWorkload.cpp
+ ClPermuteWorkload.hpp
+ ClPooling2dBaseWorkload.cpp
+ ClPooling2dBaseWorkload.hpp
+ ClPooling2dFloatWorkload.cpp
+ ClPooling2dFloatWorkload.hpp
+ ClPooling2dUint8Workload.cpp
+ ClPooling2dUint8Workload.hpp
+ ClReshapeFloatWorkload.cpp
+ ClReshapeFloatWorkload.hpp
+ ClReshapeUint8Workload.cpp
+ ClReshapeUint8Workload.hpp
+ ClResizeBilinearFloatWorkload.cpp
+ ClResizeBilinearFloatWorkload.hpp
+ ClSoftmaxBaseWorkload.cpp
+ ClSoftmaxBaseWorkload.hpp
+ ClSoftmaxFloatWorkload.cpp
+ ClSoftmaxFloatWorkload.hpp
+ ClSoftmaxUint8Workload.cpp
+ ClSoftmaxUint8Workload.hpp
+ ClSplitterFloatWorkload.cpp
+ ClSplitterFloatWorkload.hpp
+ ClSplitterUint8Workload.cpp
+ ClSplitterUint8Workload.hpp
+ ClSubtractionWorkload.cpp
+ ClSubtractionWorkload.hpp
+ ClWorkloads.hpp
+ ClWorkloadUtils.hpp
+)
+
+add_library(armnnClBackendWorkloads STATIC ${armnnClBackendWorkloads_sources})
+target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src)
+target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn)
+target_include_directories(armnnClBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils)
diff --git a/src/backends/cl/workloads/ClActivationFloatWorkload.cpp b/src/backends/cl/workloads/ClActivationFloatWorkload.cpp
new file mode 100644
index 0000000000..cbaac9d226
--- /dev/null
+++ b/src/backends/cl/workloads/ClActivationFloatWorkload.cpp
@@ -0,0 +1,56 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClActivationFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ActivationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ const arm_compute::ActivationLayerInfo activationLayerInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
+
+ if (input.GetDataType() == DataType::QuantisedAsymm8 &&
+ activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "CL: Logistic Activations unsupported with QAsymm8 data type."};
+ }
+
+ return arm_compute::CLActivationLayer::validate(&aclInput,
+ &aclOutput,
+ activationLayerInfo);
+}
+
+ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<ActivationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1);
+
+ const arm_compute::ActivationLayerInfo activationLayerInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+}
+
+void ClActivationFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute");
+ m_ActivationLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClActivationFloatWorkload.hpp b/src/backends/cl/workloads/ClActivationFloatWorkload.hpp
new file mode 100644
index 0000000000..cb560a791b
--- /dev/null
+++ b/src/backends/cl/workloads/ClActivationFloatWorkload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ActivationDescriptor& descriptor);
+
+// Activation layer execution.
+class ClActivationFloatWorkload : public FloatWorkload<ActivationQueueDescriptor>
+{
+public:
+ ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClActivationUint8Workload.cpp b/src/backends/cl/workloads/ClActivationUint8Workload.cpp
new file mode 100644
index 0000000000..ad6b73074b
--- /dev/null
+++ b/src/backends/cl/workloads/ClActivationUint8Workload.cpp
@@ -0,0 +1,44 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClActivationUint8Workload.hpp"
+#include <backends/cl/ClLayerSupport.hpp>
+
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+ auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function);
+ arm_compute::ActivationLayerInfo layerInfo(activation,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+
+ m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ActivationLayer.configure(&input, &output, layerInfo);
+}
+
+void ClActivationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute");
+
+ m_ActivationLayer.run();
+}
+
+} //namespace Armnn
+
+
diff --git a/src/backends/cl/workloads/ClActivationUint8Workload.hpp b/src/backends/cl/workloads/ClActivationUint8Workload.hpp
new file mode 100644
index 0000000000..d0b7d3a78f
--- /dev/null
+++ b/src/backends/cl/workloads/ClActivationUint8Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+// Activation layer execution.
+class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
+{
+public:
+ ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
new file mode 100644
index 0000000000..aa032e872c
--- /dev/null
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClAdditionWorkload.hpp"
+
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+template <armnn::DataType... T>
+ClAdditionWorkload<T...>::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<AdditionQueueDescriptor, T...>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy);
+}
+
+template <armnn::DataType... T>
+void ClAdditionWorkload<T...>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute");
+ m_Layer.run();
+}
+
+bool ClAdditionValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
+ &aclInput1Info,
+ &aclOutputInfo,
+ g_AclConvertPolicy);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return supported;
+}
+
+} //namespace armnn
+
+template class armnn::ClAdditionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>;
+template class armnn::ClAdditionWorkload<armnn::DataType::QuantisedAsymm8>;
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp
new file mode 100644
index 0000000000..3e4ee26793
--- /dev/null
+++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+template <armnn::DataType... dataTypes>
+class ClAdditionWorkload : public TypedWorkload<AdditionQueueDescriptor, dataTypes...>
+{
+public:
+ ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLArithmeticAddition m_Layer;
+};
+
+bool ClAdditionValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBaseConstantWorkload.cpp b/src/backends/cl/workloads/ClBaseConstantWorkload.cpp
new file mode 100644
index 0000000000..2557020b59
--- /dev/null
+++ b/src/backends/cl/workloads/ClBaseConstantWorkload.cpp
@@ -0,0 +1,64 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClBaseConstantWorkload.hpp"
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <Half.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+template class ClBaseConstantWorkload<DataType::Float16, DataType::Float32>;
+template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>;
+
+template<armnn::DataType... dataTypes>
+void ClBaseConstantWorkload<dataTypes...>::Execute() const
+{
+ // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data
+ // on the first inference, then reused for subsequent inferences.
+ // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not
+ // have been configured at the time.
+ if (!m_RanOnce)
+ {
+ const ConstantQueueDescriptor& data = this->m_Data;
+
+ BOOST_ASSERT(data.m_LayerOutput != nullptr);
+ arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor();
+ arm_compute::DataType computeDataType = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetDataType();
+
+ switch (computeDataType)
+ {
+ case arm_compute::DataType::F16:
+ {
+ CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<Half>());
+ break;
+ }
+ case arm_compute::DataType::F32:
+ {
+ CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<float>());
+ break;
+ }
+ case arm_compute::DataType::QASYMM8:
+ {
+ CopyArmComputeClTensorData(output, data.m_LayerOutput->GetConstTensor<uint8_t>());
+ break;
+ }
+ default:
+ {
+ BOOST_ASSERT_MSG(false, "Unknown data type");
+ break;
+ }
+ }
+
+ m_RanOnce = true;
+ }
+}
+
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBaseConstantWorkload.hpp b/src/backends/cl/workloads/ClBaseConstantWorkload.hpp
new file mode 100644
index 0000000000..f7a23a9162
--- /dev/null
+++ b/src/backends/cl/workloads/ClBaseConstantWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+template <armnn::DataType... DataTypes>
+class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataTypes...>
+{
+public:
+ ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : TypedWorkload<ConstantQueueDescriptor, DataTypes...>(descriptor, info)
+ , m_RanOnce(false)
+ {
+ }
+
+ void Execute() const override;
+
+private:
+ mutable bool m_RanOnce;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBaseMergerWorkload.hpp b/src/backends/cl/workloads/ClBaseMergerWorkload.hpp
new file mode 100644
index 0000000000..f8ff6f9379
--- /dev/null
+++ b/src/backends/cl/workloads/ClBaseMergerWorkload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Merger layer common to all data types.
+template <armnn::DataType... DataTypes>
+class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...>
+{
+public:
+ using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload;
+
+ void Execute() const override
+ {
+ // With subtensors, merger is a no-op.
+ }
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp b/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp
new file mode 100644
index 0000000000..7fdcc84235
--- /dev/null
+++ b/src/backends/cl/workloads/ClBaseSplitterWorkload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Splitter layer common to all data types.
+template <armnn::DataType... DataTypes>
+class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...>
+{
+public:
+ using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload;
+
+ void Execute() const override
+ {
+ // With subtensors, merger is a no-op.
+ }
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
new file mode 100644
index 0000000000..5bff7a63c9
--- /dev/null
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -0,0 +1,96 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClBatchNormalizationFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
+ const BatchNormalizationDescriptor &desc)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean);
+ const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var);
+ const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta);
+ const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma);
+
+ return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
+ &aclOutputInfo,
+ &aclMeanInfo,
+ &aclVarInfo,
+ &aclBetaInfo,
+ &aclGammaInfo,
+ desc.m_Eps);
+}
+
+ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Mean = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo());
+
+ m_Variance = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo());
+
+ m_Gamma = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo());
+
+ m_Beta = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
+
+ m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input,
+ &output,
+ m_Mean.get(),
+ m_Variance.get(),
+ m_Beta.get(),
+ m_Gamma.get(),
+ m_Data.m_Parameters.m_Eps);
+
+ InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean);
+ InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance);
+ InitializeArmComputeClTensorData(*m_Beta, m_Data.m_Beta);
+ InitializeArmComputeClTensorData(*m_Gamma, m_Data.m_Gamma);
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_Layer.prepare();
+ FreeUnusedTensors();
+}
+
+void ClBatchNormalizationFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute");
+ m_Layer.run();
+}
+
+void ClBatchNormalizationFloatWorkload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_Mean);
+ FreeTensorIfUnused(m_Variance);
+ FreeTensorIfUnused(m_Gamma);
+ FreeTensorIfUnused(m_Beta);
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
new file mode 100644
index 0000000000..804591c444
--- /dev/null
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
+ const BatchNormalizationDescriptor& desc);
+
+class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor>
+{
+public:
+ ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLBatchNormalizationLayer m_Layer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_Mean;
+ std::unique_ptr<arm_compute::CLTensor> m_Variance;
+ std::unique_ptr<arm_compute::CLTensor> m_Gamma;
+ std::unique_ptr<arm_compute::CLTensor> m_Beta;
+
+ void FreeUnusedTensors();
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/backends/cl/workloads/ClConstantFloatWorkload.cpp b/src/backends/cl/workloads/ClConstantFloatWorkload.cpp
new file mode 100644
index 0000000000..1565047c22
--- /dev/null
+++ b/src/backends/cl/workloads/ClConstantFloatWorkload.cpp
@@ -0,0 +1,18 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConstantFloatWorkload.hpp"
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void ClConstantFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute");
+ ClBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConstantFloatWorkload.hpp b/src/backends/cl/workloads/ClConstantFloatWorkload.hpp
new file mode 100644
index 0000000000..0cbeaad9ea
--- /dev/null
+++ b/src/backends/cl/workloads/ClConstantFloatWorkload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+class ClConstantFloatWorkload : public ClBaseConstantWorkload<DataType::Float16, DataType::Float32>
+{
+public:
+ using ClBaseConstantWorkload<DataType::Float16, DataType::Float32>::ClBaseConstantWorkload;
+ void Execute() const override;
+};
+
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConstantUint8Workload.cpp b/src/backends/cl/workloads/ClConstantUint8Workload.cpp
new file mode 100644
index 0000000000..a5ef0321cd
--- /dev/null
+++ b/src/backends/cl/workloads/ClConstantUint8Workload.cpp
@@ -0,0 +1,18 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConstantUint8Workload.hpp"
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void ClConstantUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute");
+ ClBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConstantUint8Workload.hpp b/src/backends/cl/workloads/ClConstantUint8Workload.hpp
new file mode 100644
index 0000000000..30556dc0d6
--- /dev/null
+++ b/src/backends/cl/workloads/ClConstantUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class ClConstantUint8Workload : public ClBaseConstantWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using ClBaseConstantWorkload<DataType::QuantisedAsymm8>::ClBaseConstantWorkload;
+ void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
new file mode 100644
index 0000000000..e7663b4ca4
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConvertFp16ToFp32Workload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
+ const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+}
+
+void ClConvertFp16ToFp32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute");
+ m_Layer.run();
+}
+
+arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ if (input.GetDataType() != DataType::Float16)
+ {
+ *reasonIfUnsupported = "Input should be Float16";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+ if (output.GetDataType() != DataType::Float32)
+ {
+ *reasonIfUnsupported = "Output should be Float32";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
+ &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return aclStatus;
+}
+
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
new file mode 100644
index 0000000000..b6447488f7
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>
+{
+public:
+
+ ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::CLDepthConvertLayer m_Layer;
+};
+
+arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
new file mode 100644
index 0000000000..2ae4adc424
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConvertFp32ToFp16Workload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
+ const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+}
+
+void ClConvertFp32ToFp16Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute");
+ m_Layer.run();
+}
+
+arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ if (input.GetDataType() != DataType::Float32)
+ {
+ *reasonIfUnsupported = "Input should be Float32";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+ if (output.GetDataType() != DataType::Float16)
+ {
+ *reasonIfUnsupported = "Output should be Float16";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
+ &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return aclStatus;
+}
+
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
new file mode 100644
index 0000000000..95d19905d7
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
+{
+public:
+
+ ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::CLDepthConvertLayer m_Layer;
+};
+
+arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp
new file mode 100644
index 0000000000..58699a8287
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.cpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConvolution2dBaseWorkload.hpp"
+#include <backends/cl/ClLayerSupport.hpp>
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <arm_compute/runtime/CL/functions/CLConvolutionLayer.h>
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const boost::optional<TensorInfo>& biases)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+
+ arm_compute::TensorInfo aclBiasesInfo;
+ arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+
+ if (descriptor.m_BiasEnabled)
+ {
+ BOOST_ASSERT(biases.is_initialized());
+
+ aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout);
+ optionalAclBiasesInfo = &aclBiasesInfo;
+ }
+
+ arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor);
+
+ return arm_compute::CLConvolutionLayer::validate(&aclInputInfo,
+ &aclWeightsInfo,
+ optionalAclBiasesInfo,
+ &aclOutputInfo,
+ layerInfo);
+}
+
+}
diff --git a/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp
new file mode 100644
index 0000000000..a983dba79a
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvolution2dBaseWorkload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <armnn/Descriptors.hpp>
+
+#include <boost/optional.hpp>
+
+#include <arm_compute/core/Error.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Convolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const boost::optional<TensorInfo>& biases);
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp
new file mode 100644
index 0000000000..813808345e
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.cpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConvolution2dFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : FloatWorkload<Convolution2dQueueDescriptor>(descriptor, info)
+ , m_ConvolutionLayer(memoryManager)
+{
+
+ // todo: check tensor shapes match.
+ const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout);
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout);
+ }
+
+ m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ConvolutionLayer.configure(&input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo);
+
+ InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
+
+ if (m_BiasTensor)
+ {
+ InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias);
+ }
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_ConvolutionLayer.prepare();
+ FreeUnusedTensors();
+}
+
+void ClConvolution2dFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute");
+
+ m_ConvolutionLayer.run();
+}
+
+void ClConvolution2dFloatWorkload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp
new file mode 100644
index 0000000000..1f9710e1ea
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvolution2dFloatWorkload.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+
+#include <memory>
+
+namespace armnn
+{
+
+class ClConvolution2dFloatWorkload : public FloatWorkload<Convolution2dQueueDescriptor>
+{
+public:
+ ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
+};
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp b/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..d9b9dfd833
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvolution2dUint8Workload.cpp
@@ -0,0 +1,81 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClConvolution2dUint8Workload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info)
+ , m_ConvolutionLayer(memoryManager)
+{
+ // todo: check tensor shapes match
+ const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout);
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout);
+ }
+
+ m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ConvolutionLayer.configure(&input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo);
+
+ InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
+
+ if (m_BiasTensor)
+ {
+ InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias);
+ }
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_ConvolutionLayer.prepare();
+ FreeUnusedTensors();
+}
+
+void ClConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute");
+
+ m_ConvolutionLayer.run();
+}
+
+void ClConvolution2dUint8Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp b/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..1720ec935c
--- /dev/null
+++ b/src/backends/cl/workloads/ClConvolution2dUint8Workload.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+
+#include <memory>
+
+namespace armnn
+{
+
+class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
+{
+public:
+ ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
+};
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp
new file mode 100644
index 0000000000..5a036db922
--- /dev/null
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.cpp
@@ -0,0 +1,125 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClDepthwiseConvolutionBaseWorkload.hpp"
+
+#include "TypeUtils.hpp"
+
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+namespace armnn
+{
+
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const boost::optional<TensorInfo>& biases)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout);
+
+ arm_compute::TensorInfo aclBiasesInfo;
+ arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+
+ if (descriptor.m_BiasEnabled)
+ {
+ BOOST_ASSERT(biases.is_initialized());
+
+ aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout);
+ optionalAclBiasesInfo = &aclBiasesInfo;
+ }
+
+ const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
+ &aclWeightsInfo,
+ optionalAclBiasesInfo,
+ &aclOutputInfo,
+ aclPadStrideInfo,
+ aclDepthMultiplier);
+}
+
+template<armnn::DataType... dataTypes>
+ClDepthwiseConvolutionBaseWorkload<dataTypes...>::ClDepthwiseConvolutionBaseWorkload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>(descriptor, info)
+{
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo);
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ }
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ std::string name = std::string("ClDepthwiseConvolution") +
+ GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload";
+ m_Data.ValidateInputsOutputs(name, 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+
+ //Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3);
+ if (use3x3Optimisation)
+ {
+ m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_DepthwiseConvolutionLayer.get())->configure(
+ &input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier);
+ }
+ else
+ {
+ m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
+ &input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier);
+ }
+
+ BOOST_ASSERT(m_DepthwiseConvolutionLayer);
+}
+
+template<armnn::DataType... dataTypes>
+void ClDepthwiseConvolutionBaseWorkload<dataTypes...>::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
+// Generate known implementations for linker
+template class ClDepthwiseConvolutionBaseWorkload<DataType::Float16, DataType::Float32>;
+template class ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp
new file mode 100644
index 0000000000..9d5cde30b6
--- /dev/null
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionBaseWorkload.hpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+#include <boost/optional.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const boost::optional<TensorInfo>& biases);
+
+template<armnn::DataType... dataTypes>
+class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>
+{
+public:
+ using TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>::m_Data;
+
+ ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+
+protected:
+ std::unique_ptr<arm_compute::IFunction> m_DepthwiseConvolutionLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp
new file mode 100644
index 0000000000..17ecd29307
--- /dev/null
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClDepthwiseConvolutionFloatWorkload.hpp"
+
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : ClDepthwiseConvolutionBaseWorkload(descriptor, info)
+{
+ InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
+
+ if (m_BiasTensor)
+ {
+ InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias);
+ }
+
+ m_DepthwiseConvolutionLayer->prepare();
+ FreeUnusedTensors();
+}
+
+void ClDepthwiseConvolutionFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute");
+ BOOST_ASSERT(m_DepthwiseConvolutionLayer);
+
+ m_DepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp
new file mode 100644
index 0000000000..4f9d5f332e
--- /dev/null
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionFloatWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClDepthwiseConvolutionBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload<DataType::Float16,
+ DataType::Float32>
+{
+public:
+ ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ void Execute() const override;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp
new file mode 100644
index 0000000000..22922e4df6
--- /dev/null
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.cpp
@@ -0,0 +1,40 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClDepthwiseConvolutionUint8Workload.hpp"
+
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : ClDepthwiseConvolutionBaseWorkload(descriptor, info)
+{
+ InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight);
+
+ if (m_BiasTensor)
+ {
+ InitializeArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias);
+ }
+
+ m_DepthwiseConvolutionLayer->prepare();
+ FreeUnusedTensors();
+}
+
+void ClDepthwiseConvolutionUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute");
+ BOOST_ASSERT(m_DepthwiseConvolutionLayer);
+
+ m_DepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp
new file mode 100644
index 0000000000..b9f676de94
--- /dev/null
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionUint8Workload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClDepthwiseConvolutionBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ void Execute() const override;
+};
+
+} //namespace armnn
+
+
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
new file mode 100644
index 0000000000..a2d8534682
--- /dev/null
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp
@@ -0,0 +1,48 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClDivisionFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput);
+}
+
+
+ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<DivisionQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ // Construct
+ m_ArithmeticDivision.configure(&input0, &input1, &output);
+}
+
+void ClDivisionFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute");
+
+ // Executes the layer.
+ m_ArithmeticDivision.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
new file mode 100644
index 0000000000..1aa7ec69f6
--- /dev/null
+++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output);
+
+class ClDivisionFloatWorkload : public FloatWorkload<DivisionQueueDescriptor>
+{
+public:
+ ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const
+ WorkloadInfo& info);
+
+ using FloatWorkload<DivisionQueueDescriptor>::FloatWorkload;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
new file mode 100644
index 0000000000..0a60fc3b5c
--- /dev/null
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClFloorFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : FloatWorkload<FloorQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void ClFloorFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
new file mode 100644
index 0000000000..513862a4d7
--- /dev/null
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+class ClFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor>
+{
+public:
+ ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLFloor m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
new file mode 100644
index 0000000000..b3a97f35f8
--- /dev/null
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -0,0 +1,96 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClFullyConnectedWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+
+ arm_compute::TensorInfo aclBiases;
+ arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiases = BuildArmComputeTensorInfo(biases);
+ optionalAclBiases = &aclBiases;
+ }
+
+ const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor);
+
+ return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
+ &aclWeights,
+ optionalAclBiases,
+ &aclOutput,
+ fullyConnectedLayerInfo);
+}
+
+ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
+ , m_FullyConnectedLayer(memoryManager)
+{
+ m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ m_BiasesTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+ }
+
+ m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ // Construct
+ arm_compute::FullyConnectedLayerInfo fc_info;
+ fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
+ m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
+
+ InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
+
+ if (m_BiasesTensor)
+ {
+ InitializeArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias);
+ }
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_FullyConnectedLayer.prepare();
+ FreeUnusedTensors();
+}
+
+void ClFullyConnectedWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute");
+ m_FullyConnectedLayer.run();
+}
+
+void ClFullyConnectedWorkload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_WeightsTensor);
+ FreeTensorIfUnused(m_BiasesTensor);
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
new file mode 100644
index 0000000000..0c9047235b
--- /dev/null
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+
+#include <memory>
+
+namespace armnn
+{
+
+arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor);
+
+class ClFullyConnectedWorkload : public armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>
+{
+public:
+ ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor,
+ const armnn::WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+
+ using armnn::BaseWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor;
+
+ void FreeUnusedTensors();
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
new file mode 100644
index 0000000000..edc13bcfea
--- /dev/null
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
@@ -0,0 +1,50 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClL2NormalizationFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const L2NormalizationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+ arm_compute::NormalizationLayerInfo normalizationInfo =
+ CreateAclNormalizationLayerInfoForL2Normalization(input);
+
+ return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
+}
+
+ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0]));
+}
+
+void ClL2NormalizationFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
+
+
+
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
new file mode 100644
index 0000000000..f7b7911f4c
--- /dev/null
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const L2NormalizationDescriptor& descriptor);
+
+class ClL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor>
+{
+public:
+ ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ // Purposely not a CLL2Normalize function. See constructor.
+ mutable arm_compute::CLNormalizationLayer m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
new file mode 100644
index 0000000000..352698ad1b
--- /dev/null
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
@@ -0,0 +1,391 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClLstmFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <arm_compute/runtime/CL/functions/CLLSTMLayer.h>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
+ : FloatWorkload<LstmQueueDescriptor>(descriptor, info)
+{
+ arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
+
+ // Basic parameters
+ m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo());
+
+ m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo());
+
+ m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo());
+
+ m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo());
+
+ m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo());
+
+ m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo());
+
+ m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo());
+
+ m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo());
+
+ m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo());
+
+ // for future reference: check the AndroidNN API for the logic here
+ if (!m_Data.m_Parameters.m_CifgEnabled)
+ {
+ m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo());
+
+ m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo());
+
+ m_CellToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ if (m_Data.m_CellToInputWeights != nullptr)
+ {
+ BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo());
+ }
+
+ m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo());
+
+ lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(),
+ m_RecurrentToInputWeightsTensor.get(),
+ m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr,
+ m_InputGateBiasTensor.get());
+ }
+
+ if (m_Data.m_Parameters.m_ProjectionEnabled)
+ {
+ m_ProjectionWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo());
+
+ m_ProjectionBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ if (m_Data.m_ProjectionBias != nullptr)
+ {
+ BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo());
+ }
+
+ lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(),
+ m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr);
+ }
+
+ if (m_Data.m_Parameters.m_PeepholeEnabled)
+ {
+ m_CellToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo());
+
+ m_CellToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo());
+
+ lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get());
+ }
+
+ const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ const arm_compute::ICLTensor& output_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ const arm_compute::ICLTensor& cell_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
+
+ arm_compute::ICLTensor& output_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor();
+ arm_compute::ICLTensor& cell_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[3])->GetTensor();
+
+ // Get the batch_size and the num_units from the cellStateIn dimensions
+ const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2];
+ const unsigned int batch_size = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[0]);
+ const unsigned int num_units = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[1]);
+
+ m_ScratchBuffer = std::make_unique<arm_compute::CLTensor>();
+ if (m_Data.m_Parameters.m_CifgEnabled)
+ {
+ // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG
+ armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32);
+ BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1);
+ }
+ else
+ {
+ // scratch_buffer [num_units * 3, batch_size] without CIFG
+ armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32);
+ BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2);
+ }
+
+ float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell;
+ float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj;
+
+ // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
+ arm_compute::ActivationLayerInfo activationLayerInfo;
+ if (m_Data.m_Parameters.m_ActivationFunc == 0)
+ {
+ // no activation, do nothing
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 1)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 3)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 4)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 6)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
+ }
+ else
+ {
+ throw armnn::Exception("Wrong Type of Activation Function!");
+ }
+
+
+ m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(),
+ m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
+ m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
+ m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(),
+ &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
+ &cell_state_out, &output, lstm_param, activationLayerInfo,
+ cell_threshold, projection_threshold);
+
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer);
+
+ InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights);
+ InitializeArmComputeClTensorData(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights);
+ InitializeArmComputeClTensorData(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights);
+ InitializeArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights);
+ InitializeArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights);
+ InitializeArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights);
+ InitializeArmComputeClTensorData(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias);
+ InitializeArmComputeClTensorData(*m_CellBiasTensor, m_Data.m_CellBias);
+ InitializeArmComputeClTensorData(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias);
+
+ if (!m_Data.m_Parameters.m_CifgEnabled)
+ {
+ InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights);
+ InitializeArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights);
+ if (m_Data.m_CellToInputWeights != nullptr)
+ {
+ InitializeArmComputeClTensorData(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights);
+ }
+ InitializeArmComputeClTensorData(*m_InputGateBiasTensor, m_Data.m_InputGateBias);
+ }
+
+ if (m_Data.m_Parameters.m_ProjectionEnabled)
+ {
+ InitializeArmComputeClTensorData(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights);
+ if (m_Data.m_ProjectionBias != nullptr)
+ {
+ InitializeArmComputeClTensorData(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias);
+ }
+ }
+
+ if (m_Data.m_Parameters.m_PeepholeEnabled)
+ {
+ InitializeArmComputeClTensorData(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights);
+ InitializeArmComputeClTensorData(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights);
+ }
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_LstmLayer.prepare();
+ FreeUnusedTensors();
+}
+
+void ClLstmFloatWorkload::Execute() const
+{
+ m_LstmLayer.run();
+}
+
+arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn,
+ const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+ const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+ const TensorInfo& output, const LstmDescriptor& descriptor,
+ const TensorInfo& inputToForgetWeights,
+ const TensorInfo& inputToCellWeights,
+ const TensorInfo& inputToOutputWeights,
+ const TensorInfo& recurrentToForgetWeights,
+ const TensorInfo& recurrentToCellWeights,
+ const TensorInfo& recurrentToOutputWeights,
+ const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
+ const TensorInfo& outputGateBias,
+ const TensorInfo* inputToInputWeights,
+ const TensorInfo* recurrentToInputWeights,
+ const TensorInfo* cellToInputWeights,
+ const TensorInfo* inputGateBias,
+ const TensorInfo* projectionWeights,
+ const TensorInfo* projectionBias,
+ const TensorInfo* cellToForgetWeights,
+ const TensorInfo* cellToOutputWeights)
+{
+ arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
+
+ // The inputs and the outputs
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
+ const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
+ const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
+ const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
+ const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ // Basic parameters
+ const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights);
+ const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights);
+ const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights);
+ const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
+ = BuildArmComputeTensorInfo(recurrentToForgetWeights);
+ const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
+ = BuildArmComputeTensorInfo(recurrentToCellWeights);
+ const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
+ = BuildArmComputeTensorInfo(recurrentToOutputWeights);
+ const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias);
+ const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias);
+ const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias);
+
+ arm_compute::TensorInfo aclInputToInputWeightsInfo;
+ arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
+ arm_compute::TensorInfo aclCellToInputWeightsInfo;
+ arm_compute::TensorInfo aclInputGateBiasInfo;
+ arm_compute::TensorInfo aclProjectionWeightsInfo;
+ arm_compute::TensorInfo aclProjectionBiasInfo;
+ arm_compute::TensorInfo aclCellToForgetWeightsInfo;
+ arm_compute::TensorInfo aclCellToOutputWeightsInfo;
+
+ if (!descriptor.m_CifgEnabled)
+ {
+ armnn::TensorInfo inputToInputWInfo = *inputToInputWeights;
+ aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo);
+ armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights;
+ aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo);
+
+ if (cellToInputWeights != nullptr)
+ {
+ armnn::TensorInfo cellToInputWInfo = *cellToInputWeights;
+ aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo);
+ }
+ armnn::TensorInfo inputGateBiasInfo = *inputGateBias;
+ aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo);
+ lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
+ cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr,
+ &aclInputGateBiasInfo);
+ }
+
+ if (descriptor.m_ProjectionEnabled)
+ {
+ const armnn::TensorInfo& projectionWInfo = *projectionWeights;
+ aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo);
+
+ if (projectionBias != nullptr)
+ {
+ const armnn::TensorInfo& projectionBiasInfo = *projectionBias;
+ aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo);
+ }
+ lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
+ projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr);
+ }
+
+ if (descriptor.m_PeepholeEnabled)
+ {
+ const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights;
+ aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo);
+ const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights;
+ aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo);
+ lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
+ }
+
+ float cell_threshold = descriptor.m_ClippingThresCell;
+ float projection_threshold = descriptor.m_ClippingThresProj;
+
+ // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
+ arm_compute::ActivationLayerInfo activationLayerInfo;
+ if (descriptor.m_ActivationFunc == 0)
+ {
+ // no activation, do nothing
+ }
+ else if (descriptor.m_ActivationFunc == 1)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+ }
+ else if (descriptor.m_ActivationFunc == 3)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
+ }
+ else if (descriptor.m_ActivationFunc == 4)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
+ }
+ else if (descriptor.m_ActivationFunc == 6)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
+ }
+ else
+ {
+ throw armnn::Exception("Wrong Type of Activation Function!");
+ }
+
+ return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
+ &aclInputToCellWeightsInfo,
+ &aclInputToOutputWeightsInfo,
+ &aclRecurrentToForgetWeightsInfo,
+ &aclRecurrentToCellWeightsInfo,
+ &aclRecurrentToOutputWeightsInfo,
+ &aclForgetGateBiasInfo,
+ &aclCellBiasInfo,
+ &aclOutputGateBiasInfo,
+ &aclOutputStateInInfo, &aclCellStateInInfo,
+ &aclScratchBufferInfo, &aclOutputStateOutInfo,
+ &aclCellStateOutInfo, &aclOutputInfo,
+ lstm_params_info, activationLayerInfo,
+ cell_threshold, projection_threshold);
+}
+
+void ClLstmFloatWorkload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_InputToInputWeightsTensor);
+ FreeTensorIfUnused(m_InputToForgetWeightsTensor);
+ FreeTensorIfUnused(m_InputToCellWeightsTensor);
+ FreeTensorIfUnused(m_InputToOutputWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToInputWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToCellWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor);
+ FreeTensorIfUnused(m_CellToInputWeightsTensor);
+ FreeTensorIfUnused(m_CellToForgetWeightsTensor);
+ FreeTensorIfUnused(m_CellToOutputWeightsTensor);
+ FreeTensorIfUnused(m_InputGateBiasTensor);
+ FreeTensorIfUnused(m_ForgetGateBiasTensor);
+ FreeTensorIfUnused(m_CellBiasTensor);
+ FreeTensorIfUnused(m_OutputGateBiasTensor);
+ FreeTensorIfUnused(m_ProjectionWeightsTensor);
+ FreeTensorIfUnused(m_ProjectionBiasTensor);
+ FreeTensorIfUnused(m_ScratchBuffer);
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
new file mode 100644
index 0000000000..352d774a99
--- /dev/null
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp
@@ -0,0 +1,68 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+#include <backends/WorkloadData.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+class ClLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor>
+{
+public:
+ ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLLSTMLayer m_LstmLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_InputToInputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputToForgetWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputToCellWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputToOutputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToInputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToForgetWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToCellWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToOutputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellToInputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellToForgetWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellToOutputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputGateBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_ForgetGateBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_OutputGateBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_ProjectionWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_ProjectionBiasTensor;
+
+ std::unique_ptr<arm_compute::CLTensor> m_ScratchBuffer;
+
+ void FreeUnusedTensors();
+};
+
+arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn,
+ const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+ const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+ const TensorInfo& output, const LstmDescriptor &descriptor,
+ const TensorInfo& inputToForgetWeights,
+ const TensorInfo& inputToCellWeights,
+ const TensorInfo& inputToOutputWeights,
+ const TensorInfo& recurrentToForgetWeights,
+ const TensorInfo& recurrentToCellWeights,
+ const TensorInfo& recurrentToOutputWeights,
+ const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
+ const TensorInfo& outputGateBias,
+ const TensorInfo* inputToInputWeights,
+ const TensorInfo* recurrentToInputWeights,
+ const TensorInfo* cellToInputWeights,
+ const TensorInfo* inputGateBias,
+ const TensorInfo* projectionWeights,
+ const TensorInfo* projectionBias,
+ const TensorInfo* cellToForgetWeights,
+ const TensorInfo* cellToOutputWeights);
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClMergerFloatWorkload.cpp b/src/backends/cl/workloads/ClMergerFloatWorkload.cpp
new file mode 100644
index 0000000000..151f1e0ee7
--- /dev/null
+++ b/src/backends/cl/workloads/ClMergerFloatWorkload.cpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClMergerFloatWorkload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void ClMergerFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute");
+ ClBaseMergerWorkload::Execute();
+}
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClMergerFloatWorkload.hpp b/src/backends/cl/workloads/ClMergerFloatWorkload.hpp
new file mode 100644
index 0000000000..9782f7a8f3
--- /dev/null
+++ b/src/backends/cl/workloads/ClMergerFloatWorkload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class ClMergerFloatWorkload : public ClBaseMergerWorkload<DataType::Float16, DataType::Float32>
+{
+public:
+ using ClBaseMergerWorkload<DataType::Float16, DataType::Float32>::ClBaseMergerWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
+
+
diff --git a/src/backends/cl/workloads/ClMergerUint8Workload.cpp b/src/backends/cl/workloads/ClMergerUint8Workload.cpp
new file mode 100644
index 0000000000..9d1060d857
--- /dev/null
+++ b/src/backends/cl/workloads/ClMergerUint8Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClMergerUint8Workload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void ClMergerUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute");
+ ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClMergerUint8Workload.hpp b/src/backends/cl/workloads/ClMergerUint8Workload.hpp
new file mode 100644
index 0000000000..cbfc19a0f2
--- /dev/null
+++ b/src/backends/cl/workloads/ClMergerUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class ClMergerUint8Workload : public ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>
+{
+public:
+ using ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>::ClBaseMergerWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp
new file mode 100644
index 0000000000..d53e149129
--- /dev/null
+++ b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.cpp
@@ -0,0 +1,60 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClMultiplicationFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
+ // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
+ // ignored for F32 tensors.
+ return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
+ &aclInput2,
+ &aclOutput,
+ 1.0f,
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_ZERO);
+}
+
+
+ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ // Construct
+ m_PixelWiseMultiplication.configure(&input0,
+ &input1,
+ &output,
+ 1.0f,
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+}
+
+void ClMultiplicationFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute");
+
+ // Executes the layer.
+ m_PixelWiseMultiplication.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp
new file mode 100644
index 0000000000..a793ac64df
--- /dev/null
+++ b/src/backends/cl/workloads/ClMultiplicationFloatWorkload.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output);
+
+class ClMultiplicationFloatWorkload : public FloatWorkload<MultiplicationQueueDescriptor>
+{
+public:
+ ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ using FloatWorkload<MultiplicationQueueDescriptor>::FloatWorkload;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
new file mode 100644
index 0000000000..969c9bb08b
--- /dev/null
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -0,0 +1,51 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClNormalizationFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include "ClWorkloadUtils.hpp"
+
+using namespace armnn::armcomputetensorutils;
+
+namespace armnn
+{
+
+arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const NormalizationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
+
+ arm_compute::NormalizationLayerInfo layerInfo = BuildArmComputeNormalizationLayerInfo(descriptor);
+
+ return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
+
+ m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+};
+
+void ClNormalizationFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute");
+ m_NormalizationLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
new file mode 100644
index 0000000000..f30be91aaa
--- /dev/null
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const NormalizationDescriptor& descriptor);
+
+class ClNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor>
+{
+public:
+ ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLNormalizationLayer m_NormalizationLayer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
new file mode 100644
index 0000000000..45dc5e8be7
--- /dev/null
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClPadWorkload.hpp"
+
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <arm_compute/core/Types.h>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+template <armnn::DataType... T>
+ClPadWorkload<T...>::ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
+: TypedWorkload<PadQueueDescriptor, T...>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+ arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(descriptor.m_Parameters.m_PadList);
+
+ m_Layer.configure(&input, &output, padList);
+}
+
+template <armnn::DataType... T>
+void ClPadWorkload<T...>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute");
+ m_Layer.run();
+}
+
+bool ClPadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const PadDescriptor& descriptor,
+ std::string* reasonIfUnsupported)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ arm_compute::PaddingList padList = static_cast<arm_compute::PaddingList>(descriptor.m_PadList);
+
+ const arm_compute::Status aclStatus = arm_compute::CLPadLayer::validate(&aclInputInfo,
+ &aclOutputInfo,
+ padList);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return supported;
+}
+
+} // namespace armnn
+
+template class armnn::ClPadWorkload<armnn::DataType::Float16, armnn::DataType::Float32>;
+template class armnn::ClPadWorkload<armnn::DataType::QuantisedAsymm8>;
diff --git a/src/backends/cl/workloads/ClPadWorkload.hpp b/src/backends/cl/workloads/ClPadWorkload.hpp
new file mode 100644
index 0000000000..a7ad6670a7
--- /dev/null
+++ b/src/backends/cl/workloads/ClPadWorkload.hpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/WorkloadData.hpp>
+#include <backends/Workload.hpp>
+#include <arm_compute/runtime/CL/functions/CLPadLayer.h>
+
+namespace armnn {
+
+template <armnn::DataType... dataTypes>
+class ClPadWorkload : public TypedWorkload<PadQueueDescriptor, dataTypes...>
+{
+public:
+ ClPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLPadLayer m_Layer;
+};
+
+bool ClPadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const PadDescriptor& descriptor,
+ std::string* reasonIfUnsupported);
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp
new file mode 100644
index 0000000000..079772dbaf
--- /dev/null
+++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp
@@ -0,0 +1,56 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClPermuteWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <arm_compute/core/Error.h>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor)
+{
+ const armnn::PermutationVector& perm = descriptor.m_DimMappings;
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U })
+ && !perm.IsEqual({ 0U, 2U, 3U, 1U })
+ && !perm.IsEqual({ 3U, 2U, 0U, 1U }),
+ "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported");
+
+ return arm_compute::Status{};
+}
+
+template <armnn::DataType... DataTypes>
+ClPermuteWorkload<DataTypes...>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info)
+{
+ using armcomputetensorutils::BuildArmComputePermutationVector;
+
+ m_Data.ValidateInputsOutputs(GetName(), 1, 1);
+
+ const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
+
+ // Run the layer.
+ m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
+}
+
+template <armnn::DataType... DataTypes>
+void ClPermuteWorkload<DataTypes...>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute");
+ m_PermuteFunction.run();
+}
+
+template class ClPermuteWorkload<DataType::Float16, DataType::Float32>;
+template class ClPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.hpp b/src/backends/cl/workloads/ClPermuteWorkload.hpp
new file mode 100644
index 0000000000..8ff5707ad6
--- /dev/null
+++ b/src/backends/cl/workloads/ClPermuteWorkload.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+#include <backends/WorkloadData.hpp>
+
+#include <armnn/TypesUtils.hpp>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+#include <string>
+
+namespace armnn
+{
+
+arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor);
+
+template<armnn::DataType... DataTypes>
+class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...>
+{
+public:
+ static const std::string& GetName()
+ {
+ static const std::string name = std::string("ClPermuteWorkload");
+ return name;
+ }
+
+ ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data;
+ mutable arm_compute::CLPermute m_PermuteFunction;
+};
+
+using ClPermuteFloatWorkload = ClPermuteWorkload<DataType::Float16, DataType::Float32>;
+using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp b/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp
new file mode 100644
index 0000000000..98911856fe
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling2dBaseWorkload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClPooling2dBaseWorkload.hpp"
+#include <backends/cl/ClLayerSupport.hpp>
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Pooling2dDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
+
+ return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+template <armnn::DataType... dataTypes>
+ClPooling2dBaseWorkload<dataTypes...>::ClPooling2dBaseWorkload(
+ const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name)
+ : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs(name, 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters);
+
+ // Run the layer.
+ m_PoolingLayer.configure(&input, &output, layerInfo);
+}
+
+template class ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>;
+template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>;
+
+}
diff --git a/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp b/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp
new file mode 100644
index 0000000000..8f9db08ddc
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling2dBaseWorkload.hpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Pooling2dDescriptor& descriptor);
+
+// Base class template providing an implementation of the Pooling2d layer common to all data types.
+template <armnn::DataType... dataTypes>
+class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>
+{
+public:
+ using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data;
+
+ ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ const std::string& name);
+
+protected:
+ mutable arm_compute::CLPoolingLayer m_PoolingLayer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp b/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp
new file mode 100644
index 0000000000..dc9d17f0ae
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling2dFloatWorkload.cpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClPooling2dFloatWorkload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>(descriptor, info, "ClPooling2dFloatWorkload")
+{
+}
+
+void ClPooling2dFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute");
+ m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp b/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp
new file mode 100644
index 0000000000..ba9294c40f
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling2dFloatWorkload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include "ClPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>
+{
+public:
+ ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp b/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp
new file mode 100644
index 0000000000..0b4b15f806
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling2dUint8Workload.cpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClPooling2dUint8Workload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>(descriptor, info, "ClPooling2dUint8Workload")
+{
+}
+
+void ClPooling2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute");
+ m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
+
diff --git a/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp b/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp
new file mode 100644
index 0000000000..b07f955343
--- /dev/null
+++ b/src/backends/cl/workloads/ClPooling2dUint8Workload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include "ClPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+};
+
+} //namespace armnn
+
+
diff --git a/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp b/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp
new file mode 100644
index 0000000000..4da3bbd703
--- /dev/null
+++ b/src/backends/cl/workloads/ClReshapeFloatWorkload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClReshapeFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void ClReshapeFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp b/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp
new file mode 100644
index 0000000000..e5fc20ec8b
--- /dev/null
+++ b/src/backends/cl/workloads/ClReshapeFloatWorkload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+class ClReshapeFloatWorkload : public FloatWorkload<ReshapeQueueDescriptor>
+{
+public:
+ ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
diff --git a/src/backends/cl/workloads/ClReshapeUint8Workload.cpp b/src/backends/cl/workloads/ClReshapeUint8Workload.cpp
new file mode 100644
index 0000000000..8fbee151fc
--- /dev/null
+++ b/src/backends/cl/workloads/ClReshapeUint8Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClReshapeUint8Workload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input, &output);
+}
+
+void ClReshapeUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute");
+
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClReshapeUint8Workload.hpp b/src/backends/cl/workloads/ClReshapeUint8Workload.hpp
new file mode 100644
index 0000000000..654437a4c1
--- /dev/null
+++ b/src/backends/cl/workloads/ClReshapeUint8Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+// Reshape
+class ClReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor>
+{
+public:
+ ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
diff --git a/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp
new file mode 100644
index 0000000000..499466e959
--- /dev/null
+++ b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.cpp
@@ -0,0 +1,38 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClResizeBilinearFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/cl/ClLayerSupport.hpp>
+#include <backends/aclCommon/ArmComputeUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<ResizeBilinearQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR,
+ arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f),
+ arm_compute::SamplingPolicy::TOP_LEFT);
+};
+
+void ClResizeBilinearFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute");
+ m_ResizeBilinearLayer.run();
+}
+
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp
new file mode 100644
index 0000000000..f29f416907
--- /dev/null
+++ b/src/backends/cl/workloads/ClResizeBilinearFloatWorkload.hpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+class ClResizeBilinearFloatWorkload : public FloatWorkload<ResizeBilinearQueueDescriptor>
+{
+public:
+ ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLScale m_ResizeBilinearLayer;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp
new file mode 100644
index 0000000000..eb05a19670
--- /dev/null
+++ b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSoftmaxBaseWorkload.hpp"
+
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include <arm_compute/runtime/CL/functions/CLSoftmaxLayer.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output)
+{
+ // NOTE: We report 4D Softmax as unsupported until full support is added to ACL
+ if(input.GetShape().GetNumDimensions() >= 4u)
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported");
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo);
+}
+
+}
diff --git a/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp
new file mode 100644
index 0000000000..b800056cdf
--- /dev/null
+++ b/src/backends/cl/workloads/ClSoftmaxBaseWorkload.hpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <armnn/Tensor.hpp>
+#include <arm_compute/core/Error.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output);
+
+} // namespace armnn
diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp
new file mode 100644
index 0000000000..606005659f
--- /dev/null
+++ b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSoftmaxFloatWorkload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info)
+ , m_SoftmaxLayer(memoryManager)
+{
+ m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta);
+}
+
+void ClSoftmaxFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute");
+ m_SoftmaxLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp
new file mode 100644
index 0000000000..b400b3c7ea
--- /dev/null
+++ b/src/backends/cl/workloads/ClSoftmaxFloatWorkload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include <arm_compute/runtime/MemoryManagerOnDemand.h>
+
+#include <memory>
+
+namespace armnn
+{
+
+class ClSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor>
+{
+public:
+ ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp
new file mode 100644
index 0000000000..7e0589e89f
--- /dev/null
+++ b/src/backends/cl/workloads/ClSoftmaxUint8Workload.cpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSoftmaxUint8Workload.hpp"
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ , m_SoftmaxLayer(memoryManager)
+{
+ m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ const auto outputQuantization = output.info()->quantization_info();
+
+ if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0))
+ {
+ throw InvalidArgumentException(
+ "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported");
+ }
+
+ m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta);
+}
+
+void ClSoftmaxUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute");
+
+ m_SoftmaxLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp
new file mode 100644
index 0000000000..4786faf60b
--- /dev/null
+++ b/src/backends/cl/workloads/ClSoftmaxUint8Workload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+
+#include <memory>
+
+namespace armnn
+{
+// Softmax
+class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
+{
+public:
+ ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
+
+ void Execute() const override;
+private:
+
+ mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
diff --git a/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp b/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp
new file mode 100644
index 0000000000..5fd634bdb6
--- /dev/null
+++ b/src/backends/cl/workloads/ClSplitterFloatWorkload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSplitterFloatWorkload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void ClSplitterFloatWorkload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute");
+ ClBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp b/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp
new file mode 100644
index 0000000000..a0b5846f8e
--- /dev/null
+++ b/src/backends/cl/workloads/ClSplitterFloatWorkload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+
+class ClSplitterFloatWorkload : public ClBaseSplitterWorkload<DataType::Float16, DataType::Float32>
+{
+public:
+ using ClBaseSplitterWorkload<DataType::Float16, DataType::Float32>::ClBaseSplitterWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSplitterUint8Workload.cpp b/src/backends/cl/workloads/ClSplitterUint8Workload.cpp
new file mode 100644
index 0000000000..50a251ada7
--- /dev/null
+++ b/src/backends/cl/workloads/ClSplitterUint8Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSplitterUint8Workload.hpp"
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+void ClSplitterUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute");
+ ClBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClSplitterUint8Workload.hpp b/src/backends/cl/workloads/ClSplitterUint8Workload.hpp
new file mode 100644
index 0000000000..19e8be5034
--- /dev/null
+++ b/src/backends/cl/workloads/ClSplitterUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "ClBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+class ClSplitterUint8Workload : public ClBaseSplitterWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using ClBaseSplitterWorkload<DataType::QuantisedAsymm8>::ClBaseSplitterWorkload;
+ virtual void Execute() const override;
+};
+} //namespace armnn
+
+
+
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
new file mode 100644
index 0000000000..37b334d94e
--- /dev/null
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -0,0 +1,66 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "ClSubtractionWorkload.hpp"
+
+#include <backends/cl/ClTensorHandle.hpp>
+#include <backends/CpuTensorHandle.hpp>
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+
+#include "ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+template <armnn::DataType... T>
+ClSubtractionWorkload<T...>::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<SubtractionQueueDescriptor, T...>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy);
+}
+
+template <armnn::DataType... T>
+void ClSubtractionWorkload<T...>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute");
+ m_Layer.run();
+}
+
+bool ClSubtractionValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info,
+ &aclInput1Info,
+ &aclOutputInfo,
+ g_AclConvertPolicy);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return supported;
+}
+
+} //namespace armnn
+
+template class armnn::ClSubtractionWorkload<armnn::DataType::Float16, armnn::DataType::Float32>;
+template class armnn::ClSubtractionWorkload<armnn::DataType::QuantisedAsymm8>;
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
new file mode 100644
index 0000000000..67b219b09d
--- /dev/null
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <backends/Workload.hpp>
+
+#include <arm_compute/runtime/CL/CLFunctions.h>
+
+namespace armnn
+{
+
+template <armnn::DataType... dataTypes>
+class ClSubtractionWorkload : public TypedWorkload<SubtractionQueueDescriptor, dataTypes...>
+{
+public:
+ ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLArithmeticSubtraction m_Layer;
+};
+
+bool ClSubtractionValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloadUtils.hpp b/src/backends/cl/workloads/ClWorkloadUtils.hpp
new file mode 100644
index 0000000000..3a8ff00bb6
--- /dev/null
+++ b/src/backends/cl/workloads/ClWorkloadUtils.hpp
@@ -0,0 +1,63 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "OpenClTimer.hpp"
+#include <backends/aclCommon/ArmComputeTensorUtils.hpp>
+#include <backends/CpuTensorHandle.hpp>
+
+#include <Half.hpp>
+
+#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \
+ ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \
+ name, \
+ armnn::OpenClTimer(), \
+ armnn::WallClockTimer())
+
+namespace armnn
+{
+
+template <typename T>
+void CopyArmComputeClTensorData(arm_compute::CLTensor& dstTensor, const T* srcData)
+{
+ {
+ ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting");
+ dstTensor.map(true);
+ }
+
+ {
+ ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor");
+ armcomputetensorutils::CopyArmComputeITensorData<T>(srcData, dstTensor);
+ }
+
+ dstTensor.unmap();
+}
+
+inline void InitializeArmComputeClTensorData(arm_compute::CLTensor& clTensor,
+ const ConstCpuTensorHandle* handle)
+{
+ BOOST_ASSERT(handle);
+
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor);
+ switch(handle->GetTensorInfo().GetDataType())
+ {
+ case DataType::Float16:
+ CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<armnn::Half>());
+ break;
+ case DataType::Float32:
+ CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<float>());
+ break;
+ case DataType::QuantisedAsymm8:
+ CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<uint8_t>());
+ break;
+ case DataType::Signed32:
+ CopyArmComputeClTensorData(clTensor, handle->GetConstTensor<int32_t>());
+ break;
+ default:
+ BOOST_ASSERT_MSG(false, "Unexpected tensor type.");
+ }
+};
+
+} //namespace armnn
diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp
new file mode 100644
index 0000000000..3329f42e08
--- /dev/null
+++ b/src/backends/cl/workloads/ClWorkloads.hpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+#include "ClActivationFloatWorkload.hpp"
+#include "ClActivationUint8Workload.hpp"
+#include "ClAdditionWorkload.hpp"
+#include "ClBaseConstantWorkload.hpp"
+#include "ClBaseMergerWorkload.hpp"
+#include "ClBatchNormalizationFloatWorkload.hpp"
+#include "ClConstantFloatWorkload.hpp"
+#include "ClConstantUint8Workload.hpp"
+#include "ClConvolution2dFloatWorkload.hpp"
+#include "ClConvolution2dUint8Workload.hpp"
+#include "ClDepthwiseConvolutionFloatWorkload.hpp"
+#include "ClDepthwiseConvolutionUint8Workload.hpp"
+#include "ClDivisionFloatWorkload.hpp"
+#include "ClFloorFloatWorkload.hpp"
+#include "ClFullyConnectedWorkload.hpp"
+#include "ClL2NormalizationFloatWorkload.hpp"
+#include "ClLstmFloatWorkload.hpp"
+#include "ClMergerFloatWorkload.hpp"
+#include "ClMergerUint8Workload.hpp"
+#include "ClMultiplicationFloatWorkload.hpp"
+#include "ClNormalizationFloatWorkload.hpp"
+#include "ClPermuteWorkload.hpp"
+#include "ClPadWorkload.hpp"
+#include "ClPooling2dFloatWorkload.hpp"
+#include "ClPooling2dUint8Workload.hpp"
+#include "ClReshapeFloatWorkload.hpp"
+#include "ClReshapeUint8Workload.hpp"
+#include "ClResizeBilinearFloatWorkload.hpp"
+#include "ClSoftmaxFloatWorkload.hpp"
+#include "ClSoftmaxUint8Workload.hpp"
+#include "ClSplitterFloatWorkload.hpp"
+#include "ClSplitterUint8Workload.hpp"
+#include "ClSubtractionWorkload.hpp"
+#include "ClConvertFp16ToFp32Workload.hpp"
+#include "ClConvertFp32ToFp16Workload.hpp"