aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/ClWorkloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/ClWorkloads')
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp33
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp24
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp47
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp27
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp57
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp30
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp54
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp26
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp26
-rw-r--r--src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp42
-rw-r--r--src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp34
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp16
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp16
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp20
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp70
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp26
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp72
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp30
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp37
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp91
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp32
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp35
-rw-r--r--src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp29
-rw-r--r--src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp52
-rw-r--r--src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp29
-rw-r--r--src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp35
-rw-r--r--src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp29
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp19
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp22
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp18
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp21
-rw-r--r--src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp39
-rw-r--r--src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp27
-rw-r--r--src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp49
-rw-r--r--src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp54
-rw-r--r--src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp42
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp47
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp31
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp24
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp21
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp25
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp24
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp31
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp26
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp29
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp27
-rw-r--r--src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp36
-rw-r--r--src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp23
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp29
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp26
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp39
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp17
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp17
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp21
61 files changed, 1983 insertions, 0 deletions
diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
new file mode 100644
index 0000000000..fb5d78425e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
@@ -0,0 +1,33 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClActivationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+
+ClActivationFloat32Workload::ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClActivationFloat32Workload", 1, 1);
+
+ const arm_compute::ActivationLayerInfo activationLayerInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_ActivationLayer.configure(&input, &output, activationLayerInfo);
+}
+
+void ClActivationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationFloat32Workload_Execute");
+ m_ActivationLayer.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
new file mode 100644
index 0000000000..9bab4202be
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Activation layer execution
+class ClActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+{
+public:
+ ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
new file mode 100644
index 0000000000..3671dd7187
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClActivationUint8Workload.hpp"
+#include "backends/ClLayerSupport.hpp"
+
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+namespace armnn
+{
+
+ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<ActivationQueueDescriptor>(descriptor, info)
+{
+
+ std::string reasonIfUnsupported;
+ if (!IsClActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters))
+ {
+ throw InvalidArgumentException(reasonIfUnsupported);
+ }
+
+ // Only BoundedReLu is supported (see IsClActivationUint8Supported)
+ arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ m_Data.m_Parameters.m_A,
+ m_Data.m_Parameters.m_B);
+
+ m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ActivationLayer.configure(&input, &output, layerInfo);
+}
+
+void ClActivationUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationUint8Workload_Execute");
+
+ m_ActivationLayer.run();
+}
+
+} //namespace Armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
new file mode 100644
index 0000000000..3a9cceb298
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Activation layer execution
+class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
+{
+public:
+ ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLActivationLayer m_ActivationLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
new file mode 100644
index 0000000000..153167f172
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
@@ -0,0 +1,57 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClAdditionFloat32Workload.hpp"
+
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClAdditionFloat32Workload::ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<AdditionQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClAdditionFloat32Workload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input0, &input1, &output, ms_AclConvertPolicy);
+}
+
+void ClAdditionFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClAdditionFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+bool ClAdditionFloat32Workload::IsSupported(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = decltype(m_Layer)::validate(&aclInput0Info,
+ &aclInput1Info,
+ &aclOutputInfo,
+ ms_AclConvertPolicy);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return supported;
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
new file mode 100644
index 0000000000..37e50c2c86
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor>
+{
+public:
+ ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+ static bool IsSupported(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+
+private:
+ mutable arm_compute::CLArithmeticAddition m_Layer;
+ static constexpr arm_compute::ConvertPolicy ms_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
new file mode 100644
index 0000000000..4b72d92d72
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClBaseConstantWorkload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+template class ClBaseConstantWorkload<DataType::Float32>;
+template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>;
+
+template<armnn::DataType dataType>
+void ClBaseConstantWorkload<dataType>::Execute() const
+{
+ // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data
+ // on the first inference, then reused for subsequent inferences.
+ // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not
+ // have been configured at the time.
+ if (!m_RanOnce)
+ {
+ const ConstantQueueDescriptor& data = this->m_Data;
+
+ BOOST_ASSERT(data.m_LayerOutput != nullptr);
+ arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor();
+
+ switch (dataType)
+ {
+ case DataType::Float32:
+ {
+ CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<float>(), output);
+ break;
+ }
+ case DataType::QuantisedAsymm8:
+ {
+ CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output);
+ break;
+ }
+ default:
+ {
+ BOOST_ASSERT_MSG(false, "Unknown data type");
+ break;
+ }
+ }
+
+ m_RanOnce = true;
+ }
+}
+
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
new file mode 100644
index 0000000000..660842f375
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+template <armnn::DataType DataType>
+class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType>
+{
+public:
+ ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info)
+ , m_RanOnce(false)
+ {
+ }
+
+ void Execute() const override;
+
+private:
+ mutable bool m_RanOnce;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
new file mode 100644
index 0000000000..7542c62b47
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Merger layer common to all data types
+template <armnn::DataType DataType>
+class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType>
+{
+public:
+ using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload;
+
+ void Execute() const override
+ {
+ // With subtensors, merger is a no-op
+ }
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
new file mode 100644
index 0000000000..fef841ced2
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Base class template providing an implementation of the Splitter layer common to all data types
+template <armnn::DataType DataType>
+class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType>
+{
+public:
+ using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload;
+
+ void Execute() const override
+ {
+ // With subtensors, merger is a no-op
+ }
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..dabd495d59
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClBatchNormalizationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClBatchNormalizationFloat32Workload::ClBatchNormalizationFloat32Workload(
+ const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info)
+{
+ BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo());
+ BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo());
+ BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo());
+ BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo());
+
+ m_Data.ValidateInputsOutputs("ClBatchNormalizationFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps);
+
+ InitialiseArmComputeClTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>());
+}
+
+void ClBatchNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClBatchNormalizationFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..ddbd0f05c0
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
@@ -0,0 +1,34 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+{
+public:
+ ClBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLBatchNormalizationLayer m_Layer;
+
+ arm_compute::CLTensor m_Mean;
+ arm_compute::CLTensor m_Variance;
+ arm_compute::CLTensor m_Gamma;
+ arm_compute::CLTensor m_Beta;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
new file mode 100644
index 0000000000..99880d68a7
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConstantFloat32Workload.hpp"
+namespace armnn
+{
+
+void ClConstantFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantFloat32Workload_Execute");
+ ClBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
new file mode 100644
index 0000000000..5f86d3b2b6
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+class ClConstantFloat32Workload : public ClBaseConstantWorkload<DataType::Float32>
+{
+public:
+ using ClBaseConstantWorkload<DataType::Float32>::ClBaseConstantWorkload;
+ void Execute() const override;
+};
+
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
new file mode 100644
index 0000000000..078d4261fa
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConstantUint8Workload.hpp"
+namespace armnn
+{
+
+void ClConstantUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantUint8Workload_Execute");
+ ClBaseConstantWorkload::Execute();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp
new file mode 100644
index 0000000000..3a53f1011e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseConstantWorkload.hpp"
+
+namespace armnn
+{
+
+class ClConstantUint8Workload : public ClBaseConstantWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using ClBaseConstantWorkload<DataType::QuantisedAsymm8>::ClBaseConstantWorkload;
+ void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
new file mode 100644
index 0000000000..6f4069bcc0
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
@@ -0,0 +1,70 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvolution2dFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info)
+{
+
+ // todo: check tensor shapes match
+ const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::CLTensor* optionalBias = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBias = &m_BiasTensor;
+ }
+
+ m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_pConvolutionLayer = std::make_unique<arm_compute::CLConvolutionLayer>();
+ static_cast<arm_compute::CLConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+
+ BOOST_ASSERT(m_pConvolutionLayer);
+
+ InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>());
+
+ if (optionalBias)
+ {
+ InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>());
+ }
+}
+
+void ClConvolution2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dFloat32Workload_Execute");
+ BOOST_ASSERT(m_pConvolutionLayer);
+
+ m_pConvolutionLayer->run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
new file mode 100644
index 0000000000..29931056a8
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+class ClConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
+{
+public:
+ ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable std::unique_ptr<arm_compute::IFunction> m_pConvolutionLayer;
+
+ arm_compute::CLTensor m_KernelTensor;
+ arm_compute::CLTensor m_BiasTensor;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
new file mode 100644
index 0000000000..a3c6ac9dca
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
@@ -0,0 +1,72 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvolution2dUint8Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info)
+{
+
+ // todo: check tensor shapes match
+ const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
+ BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ arm_compute::CLTensor* optionalBias = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBias = &m_BiasTensor;
+ }
+
+ m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ BOOST_ASSERT_MSG(IsClDirectConvolution2dSupported(weightInfo, m_Data.m_Parameters),
+ "Unsupported parameters for u8 convolution");
+
+ m_pConvolutionLayer = std::make_unique<arm_compute::CLDirectConvolutionLayer>();
+ static_cast<arm_compute::CLDirectConvolutionLayer*>(m_pConvolutionLayer.get())->configure(&input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ BOOST_ASSERT(m_pConvolutionLayer);
+
+ InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
+
+ if (optionalBias)
+ {
+ InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>());
+ }
+}
+
+void ClConvolution2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dUint8Workload_Execute");
+ BOOST_ASSERT(m_pConvolutionLayer);
+
+ m_pConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
new file mode 100644
index 0000000000..b2849d773b
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+
+namespace armnn
+{
+
+class ClConvolution2dUint8Workload : public Uint8Workload<Convolution2dQueueDescriptor>
+{
+public:
+ ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable std::unique_ptr<arm_compute::IFunction> m_pConvolutionLayer;
+
+ arm_compute::CLTensor m_KernelTensor;
+ arm_compute::CLTensor m_BiasTensor;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
new file mode 100644
index 0000000000..f31c73bc60
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClDepthwiseConvolutionFloat32Workload.hpp"
+#include "ClDepthwiseConvolutionHelper.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClDepthwiseConvolutionFloat32Workload::ClDepthwiseConvolutionFloat32Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+{
+ InitClDepthwiseConvolutionWorkload(*this);
+}
+
+void ClDepthwiseConvolutionFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionFloat32Workload_Execute");
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ m_pDepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
new file mode 100644
index 0000000000..8711f0c515
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ ClDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ typedef float KernelDataType;
+ typedef float BiasDataType;
+
+ mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
+
+ arm_compute::CLTensor m_KernelTensor;
+ arm_compute::CLTensor m_BiasTensor;
+
+ template <typename WorkloadType>
+ friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload);
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
new file mode 100644
index 0000000000..cd7115773d
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
@@ -0,0 +1,91 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <armnn/TypesUtils.hpp>
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClTensorHandle.hpp"
+
+namespace armnn
+{
+
+template <typename WorkloadType>
+void InitClDepthwiseConvolutionWorkload(WorkloadType& workload)
+{
+ using T = typename WorkloadType::KernelDataType;
+ using B = typename WorkloadType::BiasDataType;
+
+ auto& m_Data = workload.GetData();
+ auto& m_KernelTensor = workload.m_KernelTensor;
+ auto& m_BiasTensor = workload.m_BiasTensor;
+ auto& m_pDepthwiseConvolutionLayer = workload.m_pDepthwiseConvolutionLayer;
+
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ std::string reasonIfUnsupported;
+ if (!IsClDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
+ {
+ throw UnimplementedException(reasonIfUnsupported);
+ }
+
+ armcomputetensorutils::BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ arm_compute::CLTensor* optionalBias = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ armcomputetensorutils::BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBias = &m_BiasTensor;
+ }
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ std::string name = std::string("ClDepthwiseConvolution") + GetDataTypeName(GetDataType<T>()) + "Workload";
+ m_Data.ValidateInputsOutputs(name, 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ //Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3);
+ if (use3x3Optimisation)
+ {
+ m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_pDepthwiseConvolutionLayer.get())->configure(
+ &input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ }
+ else
+ {
+ m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_pDepthwiseConvolutionLayer.get())->configure(
+ &input,
+ &m_KernelTensor,
+ optionalBias,
+ &output,
+ padStrideInfo);
+ }
+
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<T>());
+
+ if (optionalBias)
+ {
+ InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->template GetConstTensor<B>());
+ }
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
new file mode 100644
index 0000000000..7e7c488c74
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
@@ -0,0 +1,32 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClDepthwiseConvolutionUint8Workload.hpp"
+#include "ClDepthwiseConvolutionHelper.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+
+ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+{
+ InitClDepthwiseConvolutionWorkload(*this);
+}
+
+void ClDepthwiseConvolutionUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionUint8Workload_Execute");
+ BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+
+ m_pDepthwiseConvolutionLayer->run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
new file mode 100644
index 0000000000..ee09ff3e58
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
+{
+public:
+ ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ typedef uint8_t KernelDataType;
+ typedef int32_t BiasDataType;
+
+ mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
+
+ arm_compute::CLTensor m_KernelTensor;
+ arm_compute::CLTensor m_BiasTensor;
+
+ template <typename WorkloadType>
+ friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload);
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
new file mode 100644
index 0000000000..882da50855
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClFloorFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClFloorFloat32Workload::ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<FloorQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClFloorFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void ClFloorFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFloorFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
new file mode 100644
index 0000000000..532dd29884
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+{
+public:
+ ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLFloor m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
new file mode 100644
index 0000000000..96596b9d9c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
@@ -0,0 +1,52 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClFullyConnectedFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClFullyConnectedFloat32Workload::ClFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
+{
+
+ BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+
+ arm_compute::CLTensor* optionalBiasTensor = nullptr;
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
+ optionalBiasTensor = &m_BiasesTensor;
+ }
+
+ m_Data.ValidateInputsOutputs("ClFullyConnectedFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ // Construct
+ m_FullyConnected.configure(
+ &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix);
+
+ // Allocate
+ InitialiseArmComputeClTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>());
+
+ if (optionalBiasTensor)
+ {
+ InitialiseArmComputeClTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>());
+ }
+}
+
+void ClFullyConnectedFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFullyConnectedFloat32Workload_Execute");
+ m_FullyConnected.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
new file mode 100644
index 0000000000..def20e0831
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+
+namespace armnn
+{
+
+class ClFullyConnectedFloat32Workload : public armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>
+{
+public:
+ ClFullyConnectedFloat32Workload(const armnn::FullyConnectedQueueDescriptor& descriptor,
+ const armnn::WorkloadInfo& info);
+
+ using armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>::m_Data;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLFullyConnectedLayer m_FullyConnected;
+ arm_compute::CLTensor m_WeightsTensor;
+ arm_compute::CLTensor m_BiasesTensor;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..e15db74ec9
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
@@ -0,0 +1,35 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClL2NormalizationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClL2NormalizationFloat32Workload::ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClL2NormalizationFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0]));
+}
+
+void ClL2NormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClL2NormalizationFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..848803e2f0
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+{
+public:
+ ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ // Purposely not a CLL2Normalize function. See constructor.
+ mutable arm_compute::CLNormalizationLayer m_Layer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
new file mode 100644
index 0000000000..4d2d708a0e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClMergerFloat32Workload.hpp"
+
+
+namespace armnn
+{
+
+void ClMergerFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerFloat32Workload_Execute");
+ ClBaseMergerWorkload::Execute();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
new file mode 100644
index 0000000000..9808d30ccf
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class ClMergerFloat32Workload : public ClBaseMergerWorkload<armnn::DataType::Float32>
+{
+public:
+ using ClBaseMergerWorkload<armnn::DataType::Float32>::ClBaseMergerWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
new file mode 100644
index 0000000000..94a1d3c593
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
@@ -0,0 +1,18 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClMergerUint8Workload.hpp"
+
+
+namespace armnn
+{
+
+void ClMergerUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerUint8Workload_Execute");
+ ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp
new file mode 100644
index 0000000000..1ddbb2ac52
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseMergerWorkload.hpp"
+
+namespace armnn
+{
+
+class ClMergerUint8Workload : public ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>
+{
+public:
+ using ClBaseMergerWorkload<armnn::DataType::QuantisedAsymm8>::ClBaseMergerWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
new file mode 100644
index 0000000000..405d109aa1
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClMultiplicationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClMultiplicationFloat32Workload::ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClMultiplicationFloat32Workload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ // Construct
+ m_PixelWiseMultiplication.configure(&input0,
+ &input1,
+ &output,
+ 1.0f,
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN);
+}
+
+void ClMultiplicationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMultiplicationFloat32Workload_Execute");
+
+ // Execute the layer
+ m_PixelWiseMultiplication.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
new file mode 100644
index 0000000000..8e387118e8
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+class ClMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor>
+{
+public:
+ ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ using Float32Workload<MultiplicationQueueDescriptor>::Float32Workload;
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
new file mode 100644
index 0000000000..a163ec2883
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
@@ -0,0 +1,49 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClNormalizationFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, const TensorInfo& output,
+ const NormalizationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ arm_compute::NormalizationLayerInfo layerInfo =
+ armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor);
+
+ return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+ClNormalizationFloat32Workload::ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<NormalizationQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClNormalizationFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::NormalizationLayerInfo normalizationInfo =
+ armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
+
+ m_NormalizationLayer.configure(&input, &output, normalizationInfo);
+};
+
+void ClNormalizationFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClNormalizationFloat32Workload_Execute");
+ m_NormalizationLayer.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
new file mode 100644
index 0000000000..cbd5fa92a9
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const NormalizationDescriptor& descriptor);
+
+class ClNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+{
+public:
+ ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLNormalizationLayer m_NormalizationLayer;
+};
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
new file mode 100644
index 0000000000..3147e95b2e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
@@ -0,0 +1,54 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPermuteWorkload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+#include <arm_compute/core/Error.h>
+
+namespace armnn
+{
+
+arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor)
+{
+ const armnn::PermutationVector& perm = descriptor.m_DimMappings;
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U })
+ && !perm.IsEqual({ 0U, 2U, 3U, 1U })
+ && !perm.IsEqual({ 3U, 2U, 0U, 1U }),
+ "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported");
+
+ return arm_compute::Status{};
+}
+
+template <armnn::DataType DataType>
+ClPermuteWorkload<DataType>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info)
+{
+ using armcomputetensorutils::BuildArmComputePermutationVector;
+
+ m_Data.ValidateInputsOutputs(GetName(), 1, 1);
+
+ const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
+
+ // Run the layer
+ m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
+}
+
+template <armnn::DataType DataType>
+void ClPermuteWorkload<DataType>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, GetName() + "_Execute");
+ m_PermuteFunction.run();
+}
+
+template class ClPermuteWorkload<DataType::Float32>;
+template class ClPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
new file mode 100644
index 0000000000..430c59524e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
@@ -0,0 +1,42 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+#include <armnn/TypesUtils.hpp>
+#include <arm_compute/runtime/CL/functions/CLPermute.h>
+
+#include <string>
+
+namespace armnn
+{
+
+arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor);
+
+template <armnn::DataType DataType>
+class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+{
+public:
+ static const std::string& GetName()
+ {
+ static const std::string name = std::string("ClPermute") + GetDataTypeName(DataType) + "Workload";
+ return name;
+ }
+
+ ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+ mutable arm_compute::CLPermute m_PermuteFunction;
+};
+
+using ClPermuteFloat32Workload = ClPermuteWorkload<DataType::Float32>;
+using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>;
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
new file mode 100644
index 0000000000..dbdc06f174
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
@@ -0,0 +1,47 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPooling2dBaseWorkload.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Pooling2dDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor);
+
+ return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
+}
+
+template <armnn::DataType dataType>
+ClPooling2dBaseWorkload<dataType>::ClPooling2dBaseWorkload(
+ const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name)
+ : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs(name, 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters);
+
+ // Run the layer
+ m_PoolingLayer.configure(&input, &output, layerInfo);
+}
+
+template class ClPooling2dBaseWorkload<DataType::Float32>;
+template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>;
+
+}
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
new file mode 100644
index 0000000000..828f000505
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const Pooling2dDescriptor& descriptor);
+
+// Base class template providing an implementation of the Pooling2d layer common to all data types
+template <armnn::DataType dataType>
+class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType>
+{
+public:
+ using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data;
+
+ ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ const std::string& name);
+
+protected:
+ mutable arm_compute::CLPoolingLayer m_PoolingLayer;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
new file mode 100644
index 0000000000..a7f5855b8a
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPooling2dFloat32Workload.hpp"
+
+namespace armnn
+{
+
+ClPooling2dFloat32Workload::ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : ClPooling2dBaseWorkload<DataType::Float32>(descriptor, info, "ClPooling2dFloat32Workload")
+{
+}
+
+void ClPooling2dFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dFloat32Workload_Execute");
+ m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
new file mode 100644
index 0000000000..3456a2cff8
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+#include "backends/ClWorkloads//ClPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+class ClPooling2dFloat32Workload : public ClPooling2dBaseWorkload<DataType::Float32>
+{
+public:
+ ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
new file mode 100644
index 0000000000..2d2109e252
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
@@ -0,0 +1,25 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClPooling2dUint8Workload.hpp"
+
+namespace armnn
+{
+
+ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>(descriptor, info, "ClPooling2dUint8Workload")
+{
+}
+
+void ClPooling2dUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dUint8Workload_Execute");
+ m_PoolingLayer.run();
+}
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp
new file mode 100644
index 0000000000..0875c7486c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.hpp
@@ -0,0 +1,24 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+#include "backends/ClWorkloads/ClPooling2dBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
new file mode 100644
index 0000000000..7b4ad4415b
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
@@ -0,0 +1,31 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClReshapeFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClReshapeFloat32Workload::ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClReshapeFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output);
+}
+
+void ClReshapeFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeFloat32Workload_Execute");
+ m_Layer.run();
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
new file mode 100644
index 0000000000..e344ee08ad
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+{
+public:
+ ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
new file mode 100644
index 0000000000..36cc1dec17
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClReshapeUint8Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input, &output);
+}
+
+void ClReshapeUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeUint8Workload_Execute");
+
+ m_Layer.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp
new file mode 100644
index 0000000000..9e4199098c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.hpp
@@ -0,0 +1,27 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+// Reshape
+class ClReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor>
+{
+public:
+ ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLReshapeLayer m_Layer;
+};
+
+} //namespace armnn
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
new file mode 100644
index 0000000000..d71011a2e3
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
@@ -0,0 +1,36 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClResizeBilinearFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeUtils.hpp"
+
+namespace armnn
+{
+
+ClResizeBilinearFloat32Workload::ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32Workload<ResizeBilinearQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClResizeBilinearFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR,
+ arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f),
+ arm_compute::SamplingPolicy::TOP_LEFT);
+};
+
+void ClResizeBilinearFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClResizeBilinearFloat32Workload_Execute");
+ m_ResizeBilinearLayer.run();
+}
+
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
new file mode 100644
index 0000000000..5f70e71619
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
@@ -0,0 +1,23 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor>
+{
+public:
+ ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLScale m_ResizeBilinearLayer;
+};
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
new file mode 100644
index 0000000000..257e76a4df
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSoftmaxFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClSoftmaxFloat32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+ m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta);
+}
+
+void ClSoftmaxFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxFloat32Workload_Execute");
+ m_SoftmaxLayer.run();
+}
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
new file mode 100644
index 0000000000..a26bbe851d
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+{
+public:
+ ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
new file mode 100644
index 0000000000..9e856fea94
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
@@ -0,0 +1,39 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSoftmaxUint8Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info)
+ : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ const auto outputQuantization = output.info()->quantization_info();
+
+ if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0))
+ {
+ throw InvalidArgumentException(
+ "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported");
+ }
+
+ m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta);
+}
+
+void ClSoftmaxUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxUint8Workload_Execute");
+
+ m_SoftmaxLayer.run();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
new file mode 100644
index 0000000000..07ee6256d8
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+// Softmax
+class ClSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor>
+{
+public:
+ ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+private:
+
+ mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer;
+};
+
+} //namespace armnn
+
+
+
+
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
new file mode 100644
index 0000000000..6221d56766
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSplitterFloat32Workload.hpp"
+
+namespace armnn
+{
+
+void ClSplitterFloat32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterFloat32Workload_Execute");
+ ClBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
new file mode 100644
index 0000000000..cfc7eaa3c2
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+
+class ClSplitterFloat32Workload : public ClBaseSplitterWorkload<DataType::Float32>
+{
+public:
+ using ClBaseSplitterWorkload<DataType::Float32>::ClBaseSplitterWorkload;
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
new file mode 100644
index 0000000000..3aa470894c
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSplitterUint8Workload.hpp"
+
+namespace armnn
+{
+
+void ClSplitterUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterUint8Workload_Execute");
+ ClBaseSplitterWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp
new file mode 100644
index 0000000000..ed8b3cc69e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.hpp
@@ -0,0 +1,21 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClBaseSplitterWorkload.hpp"
+
+namespace armnn
+{
+class ClSplitterUint8Workload : public ClBaseSplitterWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using ClBaseSplitterWorkload<DataType::QuantisedAsymm8>::ClBaseSplitterWorkload;
+ virtual void Execute() const override;
+};
+} //namespace armnn
+
+
+