aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/ClWorkloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/ClWorkloads')
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp25
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp7
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp14
-rw-r--r--src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.cpp71
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.hpp29
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp41
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp18
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.cpp18
-rw-r--r--src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.hpp20
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp20
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp6
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp10
-rw-r--r--src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp10
-rw-r--r--src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp74
-rw-r--r--src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp22
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp64
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp64
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp36
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp10
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp33
-rw-r--r--src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp8
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp122
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp37
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp22
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp17
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp91
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp22
-rw-r--r--src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp16
-rw-r--r--src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp70
-rw-r--r--src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp19
-rw-r--r--src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp16
-rw-r--r--src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp5
-rw-r--r--src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.cpp405
-rw-r--r--src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.hpp67
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp26
-rw-r--r--src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp9
-rw-r--r--src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp16
-rw-r--r--src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp13
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp10
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp8
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp28
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp16
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp2
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp4
-rw-r--r--src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp2
69 files changed, 1411 insertions, 348 deletions
diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
index fb5d78425e..f072549cbc 100644
--- a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.cpp
@@ -9,10 +9,31 @@
namespace armnn
{
+arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ActivationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ const arm_compute::ActivationLayerInfo activationLayerInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
+
+ if (input.GetDataType() == DataType::QuantisedAsymm8 &&
+ activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "CL: Logistic Activations unsupported with QAsymm8 data type."};
+ }
+
+ return arm_compute::CLActivationLayer::validate(&aclInput,
+ &aclOutput,
+ activationLayerInfo);
+}
ClActivationFloat32Workload::ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<ActivationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<ActivationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClActivationFloat32Workload", 1, 1);
@@ -26,7 +47,7 @@ ClActivationFloat32Workload::ClActivationFloat32Workload(const ActivationQueueDe
void ClActivationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloat32Workload_Execute");
m_ActivationLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
index 9bab4202be..9fbfe95856 100644
--- a/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClActivationFloat32Workload.hpp
@@ -9,9 +9,12 @@
namespace armnn
{
+arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ActivationDescriptor& descriptor);
-// Activation layer execution
-class ClActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+// Activation layer execution.
+class ClActivationFloat32Workload : public FloatWorkload<ActivationQueueDescriptor>
{
public:
ClActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
index 3671dd7187..75ab3d0691 100644
--- a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.cpp
@@ -6,6 +6,7 @@
#include "ClActivationUint8Workload.hpp"
#include "backends/ClLayerSupport.hpp"
+#include "backends/ArmComputeUtils.hpp"
#include "backends/ClTensorHandle.hpp"
#include "backends/CpuTensorHandle.hpp"
namespace armnn
@@ -15,15 +16,8 @@ ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescri
const WorkloadInfo& info)
: Uint8Workload<ActivationQueueDescriptor>(descriptor, info)
{
-
- std::string reasonIfUnsupported;
- if (!IsClActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters))
- {
- throw InvalidArgumentException(reasonIfUnsupported);
- }
-
- // Only BoundedReLu is supported (see IsClActivationUint8Supported)
- arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function);
+ arm_compute::ActivationLayerInfo layerInfo(activation,
m_Data.m_Parameters.m_A,
m_Data.m_Parameters.m_B);
@@ -37,7 +31,7 @@ ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescri
void ClActivationUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClActivationUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute");
m_ActivationLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
index 3a9cceb298..449b2d56c5 100644
--- a/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClActivationUint8Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-// Activation layer execution
+// Activation layer execution.
class ClActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor>
{
public:
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.cpp
new file mode 100644
index 0000000000..5dd7bb323a
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.cpp
@@ -0,0 +1,71 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClAdditionBaseWorkload.hpp"
+
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+template <armnn::DataType... T>
+ClAdditionBaseWorkload<T...>::ClAdditionBaseWorkload(const AdditionQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<AdditionQueueDescriptor, T...>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClAdditionBaseWorkload", 2, 1);
+
+ arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[1])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+ m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy);
+}
+
+template <armnn::DataType... T>
+void ClAdditionBaseWorkload<T...>::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionBaseWorkload_Execute");
+ m_Layer.run();
+}
+
+bool ClAdditionValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ if (input0.GetDataType() == DataType::QuantisedAsymm8)
+ {
+ // Reject quantised addition for the moment (COMPMID-1385)
+ *reasonIfUnsupported = "Quantised Addition not yet supported";
+ return false;
+ }
+
+ const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info,
+ &aclInput1Info,
+ &aclOutputInfo,
+ g_AclConvertPolicy);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return supported;
+}
+
+} //namespace armnn
+
+template class armnn::ClAdditionBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>;
+template class armnn::ClAdditionBaseWorkload<armnn::DataType::QuantisedAsymm8>;
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.hpp
new file mode 100644
index 0000000000..fba8a0d457
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionBaseWorkload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+template <armnn::DataType... dataTypes>
+class ClAdditionBaseWorkload : public TypedWorkload<AdditionQueueDescriptor, dataTypes...>
+{
+public:
+ ClAdditionBaseWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
+
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLArithmeticAddition m_Layer;
+};
+
+bool ClAdditionValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
index 153167f172..b69593f5f5 100644
--- a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.cpp
@@ -13,45 +13,10 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClAdditionFloat32Workload::ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor,
- const WorkloadInfo& info)
- : Float32Workload<AdditionQueueDescriptor>(descriptor, info)
-{
- m_Data.ValidateInputsOutputs("ClAdditionFloat32Workload", 2, 1);
-
- arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
- arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
- arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input0, &input1, &output, ms_AclConvertPolicy);
-}
-
void ClAdditionFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClAdditionFloat32Workload_Execute");
- m_Layer.run();
-}
-
-bool ClAdditionFloat32Workload::IsSupported(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output,
- std::string* reasonIfUnsupported)
-{
- const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0);
- const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1);
- const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
-
- const arm_compute::Status aclStatus = decltype(m_Layer)::validate(&aclInput0Info,
- &aclInput1Info,
- &aclOutputInfo,
- ms_AclConvertPolicy);
-
- const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
- if (!supported && reasonIfUnsupported)
- {
- *reasonIfUnsupported = aclStatus.error_description();
- }
-
- return supported;
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionFloat32Workload_Execute");
+ ClAdditionBaseWorkload::Execute();
}
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
index 37e50c2c86..7eac485cfe 100644
--- a/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClAdditionFloat32Workload.hpp
@@ -5,26 +5,16 @@
#pragma once
-#include "backends/ClWorkloadUtils.hpp"
+#include "ClAdditionBaseWorkload.hpp"
namespace armnn
{
-class ClAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor>
+class ClAdditionFloat32Workload : public ClAdditionBaseWorkload<DataType::Float16, DataType::Float32>
{
public:
- ClAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
-
+ using ClAdditionBaseWorkload<DataType::Float16, DataType::Float32>::ClAdditionBaseWorkload;
void Execute() const override;
-
- static bool IsSupported(const TensorInfo& input0,
- const TensorInfo& input1,
- const TensorInfo& output,
- std::string* reasonIfUnsupported);
-
-private:
- mutable arm_compute::CLArithmeticAddition m_Layer;
- static constexpr arm_compute::ConvertPolicy ms_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
};
-} //namespace armnn \ No newline at end of file
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.cpp
new file mode 100644
index 0000000000..a72ceca471
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.cpp
@@ -0,0 +1,18 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClAdditionUint8Workload.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+void ClAdditionUint8Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionUint8Workload_Execute");
+ ClAdditionBaseWorkload::Execute();
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.hpp
new file mode 100644
index 0000000000..73ff287e7e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClAdditionUint8Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "ClAdditionBaseWorkload.hpp"
+
+namespace armnn
+{
+
+class ClAdditionUint8Workload : public ClAdditionBaseWorkload<DataType::QuantisedAsymm8>
+{
+public:
+ using ClAdditionBaseWorkload<DataType::QuantisedAsymm8>::ClAdditionBaseWorkload;
+ void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
index 4b72d92d72..e0bc365053 100644
--- a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.cpp
@@ -4,17 +4,19 @@
//
#include "ClBaseConstantWorkload.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
#include "backends/ClTensorHandle.hpp"
#include "backends/CpuTensorHandle.hpp"
+#include "Half.hpp"
namespace armnn
{
-template class ClBaseConstantWorkload<DataType::Float32>;
+template class ClBaseConstantWorkload<DataType::Float16, DataType::Float32>;
template class ClBaseConstantWorkload<DataType::QuantisedAsymm8>;
-template<armnn::DataType dataType>
-void ClBaseConstantWorkload<dataType>::Execute() const
+template<armnn::DataType... dataTypes>
+void ClBaseConstantWorkload<dataTypes...>::Execute() const
{
// The intermediate tensor held by the corresponding layer output handler can be initialised with the given data
// on the first inference, then reused for subsequent inferences.
@@ -26,15 +28,21 @@ void ClBaseConstantWorkload<dataType>::Execute() const
BOOST_ASSERT(data.m_LayerOutput != nullptr);
arm_compute::CLTensor& output = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetTensor();
+ arm_compute::DataType computeDataType = static_cast<ClTensorHandle*>(data.m_Outputs[0])->GetDataType();
- switch (dataType)
+ switch (computeDataType)
{
- case DataType::Float32:
+ case arm_compute::DataType::F16:
+ {
+ CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<Half>(), output);
+ break;
+ }
+ case arm_compute::DataType::F32:
{
CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<float>(), output);
break;
}
- case DataType::QuantisedAsymm8:
+ case arm_compute::DataType::QASYMM8:
{
CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output);
break;
diff --git a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
index 660842f375..7ad7bb93ca 100644
--- a/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClBaseConstantWorkload.hpp
@@ -9,12 +9,12 @@
namespace armnn
{
-template <armnn::DataType DataType>
-class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataType>
+template <armnn::DataType... DataTypes>
+class ClBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataTypes...>
{
public:
ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
- : TypedWorkload<ConstantQueueDescriptor, DataType>(descriptor, info)
+ : TypedWorkload<ConstantQueueDescriptor, DataTypes...>(descriptor, info)
, m_RanOnce(false)
{
}
diff --git a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
index 7542c62b47..531e32961b 100644
--- a/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClBaseMergerWorkload.hpp
@@ -10,16 +10,16 @@
namespace armnn
{
-// Base class template providing an implementation of the Merger layer common to all data types
-template <armnn::DataType DataType>
-class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType>
+// Base class template providing an implementation of the Merger layer common to all data types.
+template <armnn::DataType... DataTypes>
+class ClBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...>
{
public:
- using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload;
+ using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload;
void Execute() const override
{
- // With subtensors, merger is a no-op
+ // With subtensors, merger is a no-op.
}
};
diff --git a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
index fef841ced2..8e4f10f9fd 100644
--- a/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClBaseSplitterWorkload.hpp
@@ -10,16 +10,16 @@
namespace armnn
{
-// Base class template providing an implementation of the Splitter layer common to all data types
-template <armnn::DataType DataType>
-class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType>
+// Base class template providing an implementation of the Splitter layer common to all data types.
+template <armnn::DataType... DataTypes>
+class ClBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...>
{
public:
- using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload;
+ using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload;
void Execute() const override
{
- // With subtensors, merger is a no-op
+ // With subtensors, merger is a no-op.
}
};
diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
index dabd495d59..1849c5d411 100644
--- a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.cpp
@@ -7,36 +7,88 @@
#include "backends/ClTensorHandle.hpp"
#include "backends/CpuTensorHandle.hpp"
#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
namespace armnn
{
using namespace armcomputetensorutils;
+arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
+ const BatchNormalizationDescriptor &desc)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean);
+ const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var);
+ const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta);
+ const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma);
+
+ return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo,
+ &aclOutputInfo,
+ &aclMeanInfo,
+ &aclVarInfo,
+ &aclBetaInfo,
+ &aclGammaInfo,
+ desc.m_Eps);
+}
+
ClBatchNormalizationFloat32Workload::ClBatchNormalizationFloat32Workload(
const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
- : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
- BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo());
- BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo());
- BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo());
- BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo());
+ m_Mean = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo());
+
+ m_Variance = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo());
+
+ m_Gamma = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo());
+
+ m_Beta = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
m_Data.ValidateInputsOutputs("ClBatchNormalizationFloat32Workload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(&input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps);
- InitialiseArmComputeClTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>());
- InitialiseArmComputeClTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>());
- InitialiseArmComputeClTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>());
- InitialiseArmComputeClTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>());
+ m_Layer.configure(&input,
+ &output,
+ m_Mean.get(),
+ m_Variance.get(),
+ m_Beta.get(),
+ m_Gamma.get(),
+ m_Data.m_Parameters.m_Eps);
+
+ InitializeArmComputeClTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean);
+ InitializeArmComputeClTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance);
+ InitializeArmComputeClTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta);
+ InitializeArmComputeClTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma);
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_Layer.prepare();
+ FreeUnusedTensors();
}
void ClBatchNormalizationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClBatchNormalizationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloat32Workload_Execute");
m_Layer.run();
}
+void ClBatchNormalizationFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_Mean);
+ FreeTensorIfUnused(m_Variance);
+ FreeTensorIfUnused(m_Gamma);
+ FreeTensorIfUnused(m_Beta);
+}
+
} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
index ddbd0f05c0..a45614a284 100644
--- a/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClBatchNormalizationFloat32Workload.hpp
@@ -10,21 +10,31 @@
namespace armnn
{
-class ClBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
+ const BatchNormalizationDescriptor& desc);
+
+class ClBatchNormalizationFloat32Workload : public FloatWorkload<BatchNormalizationQueueDescriptor>
{
public:
ClBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
- using Float32Workload<BatchNormalizationQueueDescriptor>::Float32Workload;
+ using FloatWorkload<BatchNormalizationQueueDescriptor>::FloatWorkload;
void Execute() const override;
private:
mutable arm_compute::CLBatchNormalizationLayer m_Layer;
- arm_compute::CLTensor m_Mean;
- arm_compute::CLTensor m_Variance;
- arm_compute::CLTensor m_Gamma;
- arm_compute::CLTensor m_Beta;
+ std::unique_ptr<arm_compute::CLTensor> m_Mean;
+ std::unique_ptr<arm_compute::CLTensor> m_Variance;
+ std::unique_ptr<arm_compute::CLTensor> m_Gamma;
+ std::unique_ptr<arm_compute::CLTensor> m_Beta;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
index 99880d68a7..58594999a8 100644
--- a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.cpp
@@ -9,7 +9,7 @@ namespace armnn
void ClConstantFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloat32Workload_Execute");
ClBaseConstantWorkload::Execute();
}
diff --git a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
index 5f86d3b2b6..11c3fda8db 100644
--- a/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClConstantFloat32Workload.hpp
@@ -9,10 +9,10 @@
namespace armnn
{
-class ClConstantFloat32Workload : public ClBaseConstantWorkload<DataType::Float32>
+class ClConstantFloat32Workload : public ClBaseConstantWorkload<DataType::Float16, DataType::Float32>
{
public:
- using ClBaseConstantWorkload<DataType::Float32>::ClBaseConstantWorkload;
+ using ClBaseConstantWorkload<DataType::Float16, DataType::Float32>::ClBaseConstantWorkload;
void Execute() const override;
};
diff --git a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
index 078d4261fa..82ce436557 100644
--- a/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClConstantUint8Workload.cpp
@@ -9,7 +9,7 @@ namespace armnn
void ClConstantUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConstantUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute");
ClBaseConstantWorkload::Execute();
}
diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp
new file mode 100644
index 0000000000..4914be78bc
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp
@@ -0,0 +1,64 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvertFp16ToFp32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
+ const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+}
+
+void ClConvertFp16ToFp32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute");
+ m_Layer.run();
+}
+
+arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ if (input.GetDataType() != DataType::Float16)
+ {
+ *reasonIfUnsupported = "Input should be Float16";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+ if (output.GetDataType() != DataType::Float32)
+ {
+ *reasonIfUnsupported = "Output should be Float32";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
+ &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return aclStatus;
+}
+
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp
new file mode 100644
index 0000000000..36ccbb7144
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>
+{
+public:
+
+ ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::CLDepthConvertLayer m_Layer;
+};
+
+arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp
new file mode 100644
index 0000000000..19e064351f
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp
@@ -0,0 +1,64 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClConvertFp32ToFp16Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE;
+
+ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
+ const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) :
+ Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
+
+ m_Layer.configure(&input, &output, g_AclConvertPolicy, 0);
+}
+
+void ClConvertFp32ToFp16Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute");
+ m_Layer.run();
+}
+
+arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported)
+{
+ if (input.GetDataType() != DataType::Float32)
+ {
+ *reasonIfUnsupported = "Input should be Float32";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+ if (output.GetDataType() != DataType::Float16)
+ {
+ *reasonIfUnsupported = "Output should be Float16";
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported);
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate(
+ &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0);
+
+ const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK);
+ if (!supported && reasonIfUnsupported)
+ {
+ *reasonIfUnsupported = aclStatus.error_description();
+ }
+
+ return aclStatus;
+}
+
+
+} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp
new file mode 100644
index 0000000000..02a442dabc
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
+{
+public:
+
+ ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ mutable arm_compute::CLDepthConvertLayer m_Layer;
+};
+
+arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ std::string* reasonIfUnsupported);
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
index d7aef3d223..9ac31df5c1 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.cpp
@@ -15,13 +15,15 @@ using namespace armcomputetensorutils;
ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<Convolution2dQueueDescriptor>(descriptor, info)
+ : FloatWorkload<Convolution2dQueueDescriptor>(descriptor, info)
, m_ConvolutionLayer(memoryManager)
{
- // todo: check tensor shapes match
+ // todo: check tensor shapes match.
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
- BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo);
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
m_Data.m_Parameters.m_StrideY,
@@ -31,11 +33,10 @@ ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution
m_Data.m_Parameters.m_PadBottom,
arm_compute::DimensionRoundingType::FLOOR);
- arm_compute::CLTensor* optionalBias = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBias = &m_BiasTensor;
+ m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
}
m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1);
@@ -44,24 +45,35 @@ ClConvolution2dFloat32Workload::ClConvolution2dFloat32Workload(const Convolution
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
m_ConvolutionLayer.configure(&input,
- &m_KernelTensor,
- optionalBias,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
- InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>());
+ InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight);
- if (optionalBias)
+ if (m_BiasTensor)
{
- InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>());
+ InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias);
}
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_ConvolutionLayer.prepare();
+ FreeUnusedTensors();
}
void ClConvolution2dFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute");
m_ConvolutionLayer.run();
}
+void ClConvolution2dFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
index 4cf73c89cc..51c21aec32 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dFloat32Workload.hpp
@@ -14,7 +14,7 @@
namespace armnn
{
-class ClConvolution2dFloat32Workload : public Float32Workload<Convolution2dQueueDescriptor>
+class ClConvolution2dFloat32Workload : public FloatWorkload<Convolution2dQueueDescriptor>
{
public:
ClConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
@@ -22,10 +22,12 @@ public:
void Execute() const override;
private:
- mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
+ mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
- arm_compute::CLTensor m_KernelTensor;
- arm_compute::CLTensor m_BiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
index cf419e752e..a78d7fb4a2 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp
@@ -18,10 +18,11 @@ ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQu
: Uint8Workload<Convolution2dQueueDescriptor>(descriptor, info)
, m_ConvolutionLayer(memoryManager)
{
-
// todo: check tensor shapes match
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
- BuildArmComputeTensor(m_KernelTensor, weightInfo);
+
+ m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo);
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
m_Data.m_Parameters.m_StrideY,
@@ -31,11 +32,10 @@ ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQu
m_Data.m_Parameters.m_PadBottom,
arm_compute::DimensionRoundingType::FLOOR);
- arm_compute::CLTensor* optionalBias = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBias = &m_BiasTensor;
+ m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
}
m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1);
@@ -44,25 +44,36 @@ ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQu
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
m_ConvolutionLayer.configure(&input,
- &m_KernelTensor,
- optionalBias,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
- InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
+ InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
- if (optionalBias)
+ if (m_BiasTensor)
{
- InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>());
+ InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor<int32_t>());
}
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_ConvolutionLayer.prepare();
+ FreeUnusedTensors();
}
void ClConvolution2dUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClConvolution2dUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute");
m_ConvolutionLayer.run();
}
+void ClConvolution2dUint8Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
index d4d3908c80..7d9eb76ba1 100644
--- a/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp
@@ -22,10 +22,12 @@ public:
void Execute() const override;
private:
- mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
+ mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer;
- arm_compute::CLTensor m_KernelTensor;
- arm_compute::CLTensor m_BiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp
new file mode 100644
index 0000000000..cfb8485039
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp
@@ -0,0 +1,122 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClDepthwiseConvolutionBaseWorkload.hpp"
+
+#include "TypeUtils.hpp"
+
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+
+namespace armnn
+{
+
+using namespace armcomputetensorutils;
+
+arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights);
+
+ arm_compute::TensorInfo aclBiasesInfo;
+ arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiasesInfo = BuildArmComputeTensorInfo(biases);
+ optionalAclBiasesInfo = &aclBiasesInfo;
+ }
+
+ const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo,
+ &aclWeightsInfo,
+ optionalAclBiasesInfo,
+ &aclOutputInfo,
+ aclPadStrideInfo,
+ aclDepthMultiplier);
+}
+
+template<armnn::DataType... dataTypes>
+ClDepthwiseConvolutionBaseWorkload<dataTypes...>::ClDepthwiseConvolutionBaseWorkload(
+ const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>(descriptor, info)
+{
+ auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
+
+ m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo);
+
+ if (m_Data.m_Parameters.m_BiasEnabled)
+ {
+ m_BiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
+ }
+
+ arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
+ m_Data.m_Parameters.m_StrideY,
+ m_Data.m_Parameters.m_PadLeft,
+ m_Data.m_Parameters.m_PadRight,
+ m_Data.m_Parameters.m_PadTop,
+ m_Data.m_Parameters.m_PadBottom,
+ arm_compute::DimensionRoundingType::FLOOR);
+
+ std::string name = std::string("ClDepthwiseConvolution") +
+ GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload";
+ m_Data.ValidateInputsOutputs(name, 1, 1);
+
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
+ const unsigned int depthMultiplier = weightInfo.GetShape()[0];
+
+ //Check for optimisation opportunities.
+ bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3);
+ if (use3x3Optimisation)
+ {
+ m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_DepthwiseConvolutionLayer.get())->configure(
+ &input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier);
+ }
+ else
+ {
+ m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
+ &input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier);
+ }
+
+ BOOST_ASSERT(m_DepthwiseConvolutionLayer);
+}
+
+template<armnn::DataType... dataTypes>
+void ClDepthwiseConvolutionBaseWorkload<dataTypes...>::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
+// Generate known implementations for linker
+template class ClDepthwiseConvolutionBaseWorkload<DataType::Float16, DataType::Float32>;
+template class ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8>;
+
+} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp
new file mode 100644
index 0000000000..a879efc89e
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp
@@ -0,0 +1,37 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases);
+
+template<armnn::DataType... dataTypes>
+class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>
+{
+public:
+ using TypedWorkload<DepthwiseConvolution2dQueueDescriptor, dataTypes...>::m_Data;
+
+ ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
+ const WorkloadInfo& info);
+
+protected:
+ std::unique_ptr<arm_compute::IFunction> m_DepthwiseConvolutionLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
index f31c73bc60..96d97ad4ea 100644
--- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.cpp
@@ -4,8 +4,8 @@
//
#include "ClDepthwiseConvolutionFloat32Workload.hpp"
-#include "ClDepthwiseConvolutionHelper.hpp"
-#include "backends/ClTensorHandle.hpp"
+
+#include "backends/ClWorkloadUtils.hpp"
#include "backends/CpuTensorHandle.hpp"
namespace armnn
@@ -14,17 +14,25 @@ namespace armnn
ClDepthwiseConvolutionFloat32Workload::ClDepthwiseConvolutionFloat32Workload(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+ : ClDepthwiseConvolutionBaseWorkload(descriptor, info)
{
- InitClDepthwiseConvolutionWorkload(*this);
+ InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight);
+
+ if (m_BiasTensor)
+ {
+ InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias);
+ }
+
+ m_DepthwiseConvolutionLayer->prepare();
+ FreeUnusedTensors();
}
void ClDepthwiseConvolutionFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionFloat32Workload_Execute");
- BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloat32Workload_Execute");
+ BOOST_ASSERT(m_DepthwiseConvolutionLayer);
- m_pDepthwiseConvolutionLayer->run();
+ m_DepthwiseConvolutionLayer->run();
}
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
index 8711f0c515..669fd928b5 100644
--- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionFloat32Workload.hpp
@@ -5,29 +5,20 @@
#pragma once
+#include "ClDepthwiseConvolutionBaseWorkload.hpp"
+
#include "backends/ClWorkloadUtils.hpp"
namespace armnn
{
-class ClDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+class ClDepthwiseConvolutionFloat32Workload : public ClDepthwiseConvolutionBaseWorkload<DataType::Float16,
+ DataType::Float32>
{
public:
ClDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info);
void Execute() const override;
-
-private:
- typedef float KernelDataType;
- typedef float BiasDataType;
-
- mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
-
- arm_compute::CLTensor m_KernelTensor;
- arm_compute::CLTensor m_BiasTensor;
-
- template <typename WorkloadType>
- friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload);
};
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
deleted file mode 100644
index cd7115773d..0000000000
--- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionHelper.hpp
+++ /dev/null
@@ -1,91 +0,0 @@
-//
-// Copyright © 2017 Arm Ltd. All rights reserved.
-// See LICENSE file in the project root for full license information.
-//
-
-#pragma once
-
-#include <armnn/TypesUtils.hpp>
-#include "backends/ClLayerSupport.hpp"
-#include "backends/ArmComputeTensorUtils.hpp"
-#include "backends/ClTensorHandle.hpp"
-
-namespace armnn
-{
-
-template <typename WorkloadType>
-void InitClDepthwiseConvolutionWorkload(WorkloadType& workload)
-{
- using T = typename WorkloadType::KernelDataType;
- using B = typename WorkloadType::BiasDataType;
-
- auto& m_Data = workload.GetData();
- auto& m_KernelTensor = workload.m_KernelTensor;
- auto& m_BiasTensor = workload.m_BiasTensor;
- auto& m_pDepthwiseConvolutionLayer = workload.m_pDepthwiseConvolutionLayer;
-
- auto& weightInfo = m_Data.m_Weight->GetTensorInfo();
-
- std::string reasonIfUnsupported;
- if (!IsClDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
- {
- throw UnimplementedException(reasonIfUnsupported);
- }
-
- armcomputetensorutils::BuildArmComputeTensor(m_KernelTensor, weightInfo);
-
- arm_compute::CLTensor* optionalBias = nullptr;
- if (m_Data.m_Parameters.m_BiasEnabled)
- {
- armcomputetensorutils::BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBias = &m_BiasTensor;
- }
-
- arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
- m_Data.m_Parameters.m_StrideY,
- m_Data.m_Parameters.m_PadLeft,
- m_Data.m_Parameters.m_PadRight,
- m_Data.m_Parameters.m_PadTop,
- m_Data.m_Parameters.m_PadBottom,
- arm_compute::DimensionRoundingType::FLOOR);
-
- std::string name = std::string("ClDepthwiseConvolution") + GetDataTypeName(GetDataType<T>()) + "Workload";
- m_Data.ValidateInputsOutputs(name, 1, 1);
-
- arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
- arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
-
- //Check for optimisation opportunities.
- bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3);
- if (use3x3Optimisation)
- {
- m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer3x3>();
- static_cast<arm_compute::CLDepthwiseConvolutionLayer3x3*>(m_pDepthwiseConvolutionLayer.get())->configure(
- &input,
- &m_KernelTensor,
- optionalBias,
- &output,
- padStrideInfo);
- }
- else
- {
- m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
- static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_pDepthwiseConvolutionLayer.get())->configure(
- &input,
- &m_KernelTensor,
- optionalBias,
- &output,
- padStrideInfo);
- }
-
- BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
-
- InitialiseArmComputeClTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<T>());
-
- if (optionalBias)
- {
- InitialiseArmComputeClTensorData(*optionalBias, m_Data.m_Bias->template GetConstTensor<B>());
- }
-}
-
-} //namespace armnn \ No newline at end of file
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
index 7e7c488c74..4852ce8bf9 100644
--- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp
@@ -4,28 +4,34 @@
//
#include "ClDepthwiseConvolutionUint8Workload.hpp"
-#include "ClDepthwiseConvolutionHelper.hpp"
-#include "backends/ClTensorHandle.hpp"
+
#include "backends/CpuTensorHandle.hpp"
namespace armnn
{
-
ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+ : ClDepthwiseConvolutionBaseWorkload(descriptor, info)
{
- InitClDepthwiseConvolutionWorkload(*this);
+ InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<uint8_t>());
+
+ if (m_BiasTensor)
+ {
+ InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>());
+ }
+
+ m_DepthwiseConvolutionLayer->prepare();
+ FreeUnusedTensors();
}
void ClDepthwiseConvolutionUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClDepthwiseConvolutionUint8Workload_Execute");
- BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute");
+ BOOST_ASSERT(m_DepthwiseConvolutionLayer);
- m_pDepthwiseConvolutionLayer->run();
+ m_DepthwiseConvolutionLayer->run();
}
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
index ee09ff3e58..a4277d405f 100644
--- a/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp
@@ -5,29 +5,19 @@
#pragma once
+#include "ClDepthwiseConvolutionBaseWorkload.hpp"
+
#include "backends/ClWorkloadUtils.hpp"
namespace armnn
{
-class ClDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor>
+class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload<DataType::QuantisedAsymm8>
{
public:
ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info);
void Execute() const override;
-
-private:
- typedef uint8_t KernelDataType;
- typedef int32_t BiasDataType;
-
- mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
-
- arm_compute::CLTensor m_KernelTensor;
- arm_compute::CLTensor m_BiasTensor;
-
- template <typename WorkloadType>
- friend void InitClDepthwiseConvolutionWorkload(WorkloadType& workload);
};
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
index 882da50855..da71c50305 100644
--- a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
{
ClFloorFloat32Workload::ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info)
- : Float32Workload<FloorQueueDescriptor>(descriptor, info)
+ : FloatWorkload<FloorQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClFloorFloat32Workload", 1, 1);
@@ -22,7 +22,7 @@ ClFloorFloat32Workload::ClFloorFloat32Workload(const FloorQueueDescriptor& descr
void ClFloorFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFloorFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloat32Workload_Execute");
m_Layer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
index 532dd29884..bd7f3032fc 100644
--- a/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClFloorFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class ClFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+class ClFloorFloat32Workload : public FloatWorkload<FloorQueueDescriptor>
{
public:
ClFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
index 5dfab9cbbd..5014dd27ca 100644
--- a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.cpp
@@ -7,47 +7,89 @@
#include "backends/ClTensorHandle.hpp"
#include "backends/CpuTensorHandle.hpp"
#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
namespace armnn
{
using namespace armcomputetensorutils;
+arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+
+ arm_compute::TensorInfo aclBiases;
+ arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiases = BuildArmComputeTensorInfo(biases);
+ optionalAclBiases = &aclBiases;
+ }
+
+ const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor);
+
+ return arm_compute::CLFullyConnectedLayer::validate(&aclInput,
+ &aclWeights,
+ optionalAclBiases,
+ &aclOutput,
+ fullyConnectedLayerInfo);
+}
+
ClFullyConnectedFloat32Workload::ClFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
- , m_FullyConnected(memoryManager)
+ : FloatWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
+ , m_FullyConnectedLayer(memoryManager)
{
+ m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
- BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
-
- arm_compute::CLTensor* optionalBiasTensor = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBiasTensor = &m_BiasesTensor;
+ m_BiasesTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
}
m_Data.ValidateInputsOutputs("ClFullyConnectedFloat32Workload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
+
// Construct
- m_FullyConnected.configure(
- &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix);
+ arm_compute::FullyConnectedLayerInfo fc_info;
+ fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
+ m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
// Allocate
- InitialiseArmComputeClTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>());
+ InitializeArmComputeClTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight);
- if (optionalBiasTensor)
+ if (m_BiasesTensor)
{
- InitialiseArmComputeClTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>());
+ InitializeArmComputeClTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias);
}
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_FullyConnectedLayer.prepare();
+ FreeUnusedTensors();
}
void ClFullyConnectedFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClFullyConnectedFloat32Workload_Execute");
- m_FullyConnected.run();
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedFloat32Workload_Execute");
+ m_FullyConnectedLayer.run();
+}
+
+void ClFullyConnectedFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_WeightsTensor);
+ FreeTensorIfUnused(m_BiasesTensor);
}
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
index c8d1227bda..f580e580c6 100644
--- a/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClFullyConnectedFloat32Workload.hpp
@@ -14,20 +14,29 @@
namespace armnn
{
-class ClFullyConnectedFloat32Workload : public armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>
+arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor);
+
+class ClFullyConnectedFloat32Workload : public armnn::FloatWorkload<armnn::FullyConnectedQueueDescriptor>
{
public:
ClFullyConnectedFloat32Workload(const armnn::FullyConnectedQueueDescriptor& descriptor,
const armnn::WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
- using armnn::Float32Workload<armnn::FullyConnectedQueueDescriptor>::m_Data;
+ using armnn::FloatWorkload<armnn::FullyConnectedQueueDescriptor>::m_Data;
void Execute() const override;
private:
- mutable arm_compute::CLFullyConnectedLayer m_FullyConnected;
- arm_compute::CLTensor m_WeightsTensor;
- arm_compute::CLTensor m_BiasesTensor;
+ mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_WeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_BiasesTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
index e15db74ec9..628e38d3da 100644
--- a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.cpp
@@ -12,9 +12,21 @@ namespace armnn
{
using namespace armcomputetensorutils;
+arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+
+ arm_compute::NormalizationLayerInfo normalizationInfo =
+ CreateAclNormalizationLayerInfoForL2Normalization(input);
+
+ return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
+}
+
ClL2NormalizationFloat32Workload::ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClL2NormalizationFloat32Workload", 1, 1);
@@ -25,7 +37,7 @@ ClL2NormalizationFloat32Workload::ClL2NormalizationFloat32Workload(const L2Norma
void ClL2NormalizationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClL2NormalizationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloat32Workload_Execute");
m_Layer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
index 848803e2f0..bf898e31f7 100644
--- a/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClL2NormalizationFloat32Workload.hpp
@@ -10,7 +10,10 @@
namespace armnn
{
-class ClL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output);
+
+class ClL2NormalizationFloat32Workload : public FloatWorkload<L2NormalizationQueueDescriptor>
{
public:
ClL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.cpp
new file mode 100644
index 0000000000..db5c303854
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.cpp
@@ -0,0 +1,405 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClLstmFloat32Workload.hpp"
+#include "backends/ClTensorHandle.hpp"
+#include "backends/CpuTensorHandle.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ClLayerSupport.hpp"
+#include "arm_compute/runtime/CL/functions/CLLSTMLayer.h"
+
+namespace armnn
+{
+using namespace armcomputetensorutils;
+
+ClLstmFloat32Workload::ClLstmFloat32Workload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
+ : FloatWorkload<LstmQueueDescriptor>(descriptor, info)
+{
+ arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
+
+ // Basic parameters
+ m_InputToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo());
+
+ m_InputToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo());
+
+ m_InputToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo());
+
+ m_RecurrentToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo());
+
+ m_RecurrentToCellWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo());
+
+ m_RecurrentToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo());
+
+ m_ForgetGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo());
+
+ m_CellBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo());
+
+ m_OutputGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo());
+
+ // for future reference: check the AndroidNN API for the logic here
+ if (!m_Data.m_Parameters.m_CifgEnabled)
+ {
+ m_InputToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo());
+
+ m_RecurrentToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo());
+
+ m_CellToInputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ if (m_Data.m_CellToInputWeights != nullptr)
+ {
+ BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo());
+ }
+
+ m_InputGateBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo());
+
+ lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(),
+ m_RecurrentToInputWeightsTensor.get(),
+ m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr,
+ m_InputGateBiasTensor.get());
+ }
+
+ if (m_Data.m_Parameters.m_ProjectionEnabled)
+ {
+ m_ProjectionWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo());
+
+ m_ProjectionBiasTensor = std::make_unique<arm_compute::CLTensor>();
+ if (m_Data.m_ProjectionBias != nullptr)
+ {
+ BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo());
+ }
+
+ lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(),
+ m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr);
+ }
+
+ if (m_Data.m_Parameters.m_PeepholeEnabled)
+ {
+ m_CellToForgetWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo());
+
+ m_CellToOutputWeightsTensor = std::make_unique<arm_compute::CLTensor>();
+ BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo());
+
+ lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get());
+ }
+
+ const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ const arm_compute::ICLTensor& output_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+ const arm_compute::ICLTensor& cell_state_in = static_cast<IClTensorHandle*>(m_Data.m_Inputs[2])->GetTensor();
+
+ arm_compute::ICLTensor& output_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor();
+ arm_compute::ICLTensor& cell_state_out = static_cast<IClTensorHandle*>(m_Data.m_Outputs[2])->GetTensor();
+ arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[3])->GetTensor();
+
+ // Get the batch_size and the num_units from the cellStateIn dimensions
+ const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2];
+ const unsigned int batch_size = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[0]);
+ const unsigned int num_units = boost::numeric_cast<unsigned int>(inputTensorInfo.GetShape()[1]);
+
+ m_ScratchBuffer = std::make_unique<arm_compute::CLTensor>();
+ if (m_Data.m_Parameters.m_CifgEnabled)
+ {
+ // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG
+ armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32);
+ BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1);
+ }
+ else
+ {
+ // scratch_buffer [num_units * 3, batch_size] without CIFG
+ armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32);
+ BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2);
+ }
+
+ float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell;
+ float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj;
+
+ // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
+ arm_compute::ActivationLayerInfo activationLayerInfo;
+ if (m_Data.m_Parameters.m_ActivationFunc == 0)
+ {
+ // no activation, do nothing
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 1)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 3)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 4)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
+ }
+ else if (m_Data.m_Parameters.m_ActivationFunc == 6)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
+ }
+ else
+ {
+ throw armnn::Exception("Wrong Type of Activation Function!");
+ }
+
+
+ m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(),
+ m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
+ m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
+ m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(),
+ &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
+ &cell_state_out, &output, lstm_param, activationLayerInfo,
+ cell_threshold, projection_threshold);
+
+ armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer);
+
+ InitialiseArmComputeClTensorData(*m_InputToForgetWeightsTensor,
+ m_Data.m_InputToForgetWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_InputToCellWeightsTensor,
+ m_Data.m_InputToCellWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_InputToOutputWeightsTensor,
+ m_Data.m_InputToOutputWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor,
+ m_Data.m_RecurrentToForgetWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_RecurrentToCellWeightsTensor,
+ m_Data.m_RecurrentToCellWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor,
+ m_Data.m_RecurrentToOutputWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_ForgetGateBiasTensor,
+ m_Data.m_ForgetGateBias->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_CellBiasTensor,
+ m_Data.m_CellBias->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_OutputGateBiasTensor,
+ m_Data.m_OutputGateBias->GetConstTensor<float>());
+
+ if (!m_Data.m_Parameters.m_CifgEnabled)
+ {
+ InitialiseArmComputeClTensorData(*m_InputToInputWeightsTensor,
+ m_Data.m_InputToInputWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_RecurrentToInputWeightsTensor,
+ m_Data.m_RecurrentToInputWeights->GetConstTensor<float>());
+ if (m_Data.m_CellToInputWeights != nullptr)
+ {
+ InitialiseArmComputeClTensorData(*m_CellToInputWeightsTensor,
+ m_Data.m_CellToInputWeights->GetConstTensor<float>());
+ }
+ InitialiseArmComputeClTensorData(*m_InputGateBiasTensor,
+ m_Data.m_InputGateBias->GetConstTensor<float>());
+ }
+
+ if (m_Data.m_Parameters.m_ProjectionEnabled)
+ {
+ InitialiseArmComputeClTensorData(*m_ProjectionWeightsTensor,
+ m_Data.m_ProjectionWeights->GetConstTensor<float>());
+ if (m_Data.m_ProjectionBias != nullptr)
+ {
+ InitialiseArmComputeClTensorData(*m_ProjectionBiasTensor,
+ m_Data.m_ProjectionBias->GetConstTensor<float>());
+ }
+ }
+
+ if (m_Data.m_Parameters.m_PeepholeEnabled)
+ {
+ InitialiseArmComputeClTensorData(*m_CellToForgetWeightsTensor,
+ m_Data.m_CellToForgetWeights->GetConstTensor<float>());
+ InitialiseArmComputeClTensorData(*m_CellToOutputWeightsTensor,
+ m_Data.m_CellToOutputWeights->GetConstTensor<float>());
+ }
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_LstmLayer.prepare();
+ FreeUnusedTensors();
+}
+
+void ClLstmFloat32Workload::Execute() const
+{
+ m_LstmLayer.run();
+}
+
+arm_compute::Status ClLstmFloat32WorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn,
+ const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+ const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+ const TensorInfo& output, const LstmDescriptor& descriptor,
+ const TensorInfo& inputToForgetWeights,
+ const TensorInfo& inputToCellWeights,
+ const TensorInfo& inputToOutputWeights,
+ const TensorInfo& recurrentToForgetWeights,
+ const TensorInfo& recurrentToCellWeights,
+ const TensorInfo& recurrentToOutputWeights,
+ const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
+ const TensorInfo& outputGateBias,
+ const TensorInfo* inputToInputWeights,
+ const TensorInfo* recurrentToInputWeights,
+ const TensorInfo* cellToInputWeights,
+ const TensorInfo* inputGateBias,
+ const TensorInfo* projectionWeights,
+ const TensorInfo* projectionBias,
+ const TensorInfo* cellToForgetWeights,
+ const TensorInfo* cellToOutputWeights)
+{
+ arm_compute::LSTMParams<arm_compute::ITensorInfo> lstm_params_info;
+
+ // The inputs and the outputs
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn);
+ const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn);
+ const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer);
+ const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut);
+ const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+
+ // Basic parameters
+ const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights);
+ const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights);
+ const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights);
+ const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo
+ = BuildArmComputeTensorInfo(recurrentToForgetWeights);
+ const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo
+ = BuildArmComputeTensorInfo(recurrentToCellWeights);
+ const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo
+ = BuildArmComputeTensorInfo(recurrentToOutputWeights);
+ const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias);
+ const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias);
+ const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias);
+
+ arm_compute::TensorInfo aclInputToInputWeightsInfo;
+ arm_compute::TensorInfo aclRecurrentToInputWeightsInfo;
+ arm_compute::TensorInfo aclCellToInputWeightsInfo;
+ arm_compute::TensorInfo aclInputGateBiasInfo;
+ arm_compute::TensorInfo aclProjectionWeightsInfo;
+ arm_compute::TensorInfo aclProjectionBiasInfo;
+ arm_compute::TensorInfo aclCellToForgetWeightsInfo;
+ arm_compute::TensorInfo aclCellToOutputWeightsInfo;
+
+ if (!descriptor.m_CifgEnabled)
+ {
+ armnn::TensorInfo inputToInputWInfo = *inputToInputWeights;
+ aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo);
+ armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights;
+ aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo);
+
+ if (cellToInputWeights != nullptr)
+ {
+ armnn::TensorInfo cellToInputWInfo = *cellToInputWeights;
+ aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo);
+ }
+ armnn::TensorInfo inputGateBiasInfo = *inputGateBias;
+ aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo);
+ lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo,
+ cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr,
+ &aclInputGateBiasInfo);
+ }
+
+ if (descriptor.m_ProjectionEnabled)
+ {
+ const armnn::TensorInfo& projectionWInfo = *projectionWeights;
+ aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo);
+
+ if (projectionBias != nullptr)
+ {
+ const armnn::TensorInfo& projectionBiasInfo = *projectionBias;
+ aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo);
+ }
+ lstm_params_info.set_projection_params(&aclProjectionWeightsInfo,
+ projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr);
+ }
+
+ if (descriptor.m_PeepholeEnabled)
+ {
+ const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights;
+ aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo);
+ const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights;
+ aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo);
+ lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo);
+ }
+
+ float cell_threshold = descriptor.m_ClippingThresCell;
+ float projection_threshold = descriptor.m_ClippingThresProj;
+
+ // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations
+ arm_compute::ActivationLayerInfo activationLayerInfo;
+ if (descriptor.m_ActivationFunc == 0)
+ {
+ // no activation, do nothing
+ }
+ else if (descriptor.m_ActivationFunc == 1)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::RELU);
+ }
+ else if (descriptor.m_ActivationFunc == 3)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0);
+ }
+ else if (descriptor.m_ActivationFunc == 4)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0);
+ }
+ else if (descriptor.m_ActivationFunc == 6)
+ {
+ activationLayerInfo = arm_compute::ActivationLayerInfo(
+ arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC);
+ }
+ else
+ {
+ throw armnn::Exception("Wrong Type of Activation Function!");
+ }
+
+ return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo,
+ &aclInputToCellWeightsInfo,
+ &aclInputToOutputWeightsInfo,
+ &aclRecurrentToForgetWeightsInfo,
+ &aclRecurrentToCellWeightsInfo,
+ &aclRecurrentToOutputWeightsInfo,
+ &aclForgetGateBiasInfo,
+ &aclCellBiasInfo,
+ &aclOutputGateBiasInfo,
+ &aclOutputStateInInfo, &aclCellStateInInfo,
+ &aclScratchBufferInfo, &aclOutputStateOutInfo,
+ &aclCellStateOutInfo, &aclOutputInfo,
+ lstm_params_info, activationLayerInfo,
+ cell_threshold, projection_threshold);
+}
+
+void ClLstmFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_InputToInputWeightsTensor);
+ FreeTensorIfUnused(m_InputToForgetWeightsTensor);
+ FreeTensorIfUnused(m_InputToCellWeightsTensor);
+ FreeTensorIfUnused(m_InputToOutputWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToInputWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToCellWeightsTensor);
+ FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor);
+ FreeTensorIfUnused(m_CellToInputWeightsTensor);
+ FreeTensorIfUnused(m_CellToForgetWeightsTensor);
+ FreeTensorIfUnused(m_CellToOutputWeightsTensor);
+ FreeTensorIfUnused(m_InputGateBiasTensor);
+ FreeTensorIfUnused(m_ForgetGateBiasTensor);
+ FreeTensorIfUnused(m_CellBiasTensor);
+ FreeTensorIfUnused(m_OutputGateBiasTensor);
+ FreeTensorIfUnused(m_ProjectionWeightsTensor);
+ FreeTensorIfUnused(m_ProjectionBiasTensor);
+ FreeTensorIfUnused(m_ScratchBuffer);
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.hpp
new file mode 100644
index 0000000000..e2358ad10d
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClLstmFloat32Workload.hpp
@@ -0,0 +1,67 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+
+namespace armnn
+{
+
+class ClLstmFloat32Workload : public FloatWorkload<LstmQueueDescriptor>
+{
+public:
+ ClLstmFloat32Workload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+
+private:
+ mutable arm_compute::CLLSTMLayer m_LstmLayer;
+
+ std::unique_ptr<arm_compute::CLTensor> m_InputToInputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputToForgetWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputToCellWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputToOutputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToInputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToForgetWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToCellWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_RecurrentToOutputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellToInputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellToForgetWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellToOutputWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_InputGateBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_ForgetGateBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_CellBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_OutputGateBiasTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_ProjectionWeightsTensor;
+ std::unique_ptr<arm_compute::CLTensor> m_ProjectionBiasTensor;
+
+ std::unique_ptr<arm_compute::CLTensor> m_ScratchBuffer;
+
+ void FreeUnusedTensors();
+};
+
+arm_compute::Status ClLstmFloat32WorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn,
+ const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer,
+ const TensorInfo& outputStateOut, const TensorInfo& cellStateOut,
+ const TensorInfo& output, const LstmDescriptor &descriptor,
+ const TensorInfo& inputToForgetWeights,
+ const TensorInfo& inputToCellWeights,
+ const TensorInfo& inputToOutputWeights,
+ const TensorInfo& recurrentToForgetWeights,
+ const TensorInfo& recurrentToCellWeights,
+ const TensorInfo& recurrentToOutputWeights,
+ const TensorInfo& forgetGateBias, const TensorInfo& cellBias,
+ const TensorInfo& outputGateBias,
+ const TensorInfo* inputToInputWeights,
+ const TensorInfo* recurrentToInputWeights,
+ const TensorInfo* cellToInputWeights,
+ const TensorInfo* inputGateBias,
+ const TensorInfo* projectionWeights,
+ const TensorInfo* projectionBias,
+ const TensorInfo* cellToForgetWeights,
+ const TensorInfo* cellToOutputWeights);
+} //namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
index 4d2d708a0e..89e7690a36 100644
--- a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.cpp
@@ -11,7 +11,7 @@ namespace armnn
void ClMergerFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloat32Workload_Execute");
ClBaseMergerWorkload::Execute();
}
diff --git a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
index 9808d30ccf..3cafa23c1e 100644
--- a/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClMergerFloat32Workload.hpp
@@ -10,10 +10,10 @@
namespace armnn
{
-class ClMergerFloat32Workload : public ClBaseMergerWorkload<armnn::DataType::Float32>
+class ClMergerFloat32Workload : public ClBaseMergerWorkload<DataType::Float16, DataType::Float32>
{
public:
- using ClBaseMergerWorkload<armnn::DataType::Float32>::ClBaseMergerWorkload;
+ using ClBaseMergerWorkload<DataType::Float16, DataType::Float32>::ClBaseMergerWorkload;
virtual void Execute() const override;
};
diff --git a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
index 94a1d3c593..551135b7da 100644
--- a/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClMergerUint8Workload.cpp
@@ -11,7 +11,7 @@ namespace armnn
void ClMergerUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMergerUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute");
ClBaseMergerWorkload<DataType::QuantisedAsymm8>::Execute();
}
diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
index 405d109aa1..7aa33146f3 100644
--- a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.cpp
@@ -10,9 +10,29 @@
namespace armnn
{
+arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
+ // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
+ // ignored for F32 tensors.
+ return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1,
+ &aclInput2,
+ &aclOutput,
+ 1.0f,
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_ZERO);
+}
+
+
ClMultiplicationFloat32Workload::ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClMultiplicationFloat32Workload", 2, 1);
@@ -30,9 +50,9 @@ ClMultiplicationFloat32Workload::ClMultiplicationFloat32Workload(const Multiplic
void ClMultiplicationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClMultiplicationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloat32Workload_Execute");
- // Execute the layer
+ // Executes the layer.
m_PixelWiseMultiplication.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
index 8e387118e8..0d6199047d 100644
--- a/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClMultiplicationFloat32Workload.hpp
@@ -9,12 +9,17 @@
namespace armnn
{
-class ClMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor>
+
+arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output);
+
+class ClMultiplicationFloat32Workload : public FloatWorkload<MultiplicationQueueDescriptor>
{
public:
ClMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
- using Float32Workload<MultiplicationQueueDescriptor>::Float32Workload;
+ using FloatWorkload<MultiplicationQueueDescriptor>::FloatWorkload;
void Execute() const override;
private:
diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
index a163ec2883..d23d6e11bd 100644
--- a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.cpp
@@ -27,7 +27,7 @@ arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, con
ClNormalizationFloat32Workload::ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<NormalizationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClNormalizationFloat32Workload", 1, 1);
@@ -42,7 +42,7 @@ ClNormalizationFloat32Workload::ClNormalizationFloat32Workload(const Normalizati
void ClNormalizationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClNormalizationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloat32Workload_Execute");
m_NormalizationLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
index cbd5fa92a9..e8ab0b9a18 100644
--- a/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClNormalizationFloat32Workload.hpp
@@ -14,7 +14,7 @@ arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const NormalizationDescriptor& descriptor);
-class ClNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+class ClNormalizationFloat32Workload : public FloatWorkload<NormalizationQueueDescriptor>
{
public:
ClNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
index 3147e95b2e..3c132cb8f8 100644
--- a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.cpp
@@ -24,10 +24,10 @@ arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descripto
return arm_compute::Status{};
}
-template <armnn::DataType DataType>
-ClPermuteWorkload<DataType>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+template <armnn::DataType... DataTypes>
+ClPermuteWorkload<DataTypes...>::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info)
+ : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info)
{
using armcomputetensorutils::BuildArmComputePermutationVector;
@@ -37,18 +37,18 @@ ClPermuteWorkload<DataType>::ClPermuteWorkload(const PermuteQueueDescriptor& des
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
- // Run the layer
+ // Run the layer.
m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
}
-template <armnn::DataType DataType>
-void ClPermuteWorkload<DataType>::Execute() const
+template <armnn::DataType... DataTypes>
+void ClPermuteWorkload<DataTypes...>::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute");
m_PermuteFunction.run();
}
-template class ClPermuteWorkload<DataType::Float32>;
+template class ClPermuteWorkload<DataType::Float16, DataType::Float32>;
template class ClPermuteWorkload<DataType::QuantisedAsymm8>;
} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
index 430c59524e..c8726bc2c6 100644
--- a/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClPermuteWorkload.hpp
@@ -7,6 +7,7 @@
#include "backends/Workload.hpp"
#include "backends/WorkloadData.hpp"
+#include "backends/ClWorkloadUtils.hpp"
#include <armnn/TypesUtils.hpp>
#include <arm_compute/runtime/CL/functions/CLPermute.h>
@@ -18,13 +19,13 @@ namespace armnn
arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor);
-template <armnn::DataType DataType>
-class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+template<armnn::DataType... DataTypes>
+class ClPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...>
{
public:
static const std::string& GetName()
{
- static const std::string name = std::string("ClPermute") + GetDataTypeName(DataType) + "Workload";
+ static const std::string name = std::string("ClPermuteWorkload");
return name;
}
@@ -32,11 +33,11 @@ public:
void Execute() const override;
private:
- using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+ using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data;
mutable arm_compute::CLPermute m_PermuteFunction;
};
-using ClPermuteFloat32Workload = ClPermuteWorkload<DataType::Float32>;
+using ClPermuteFloatWorkload = ClPermuteWorkload<DataType::Float16, DataType::Float32>;
using ClPermuteUint8Workload = ClPermuteWorkload<DataType::QuantisedAsymm8>;
-} //namespace armnn
+} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
index dbdc06f174..6b8a230912 100644
--- a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp
@@ -25,10 +25,10 @@ arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
}
-template <armnn::DataType dataType>
-ClPooling2dBaseWorkload<dataType>::ClPooling2dBaseWorkload(
+template <armnn::DataType... dataTypes>
+ClPooling2dBaseWorkload<dataTypes...>::ClPooling2dBaseWorkload(
const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name)
- : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info)
+ : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info)
{
m_Data.ValidateInputsOutputs(name, 1, 1);
@@ -37,11 +37,11 @@ ClPooling2dBaseWorkload<dataType>::ClPooling2dBaseWorkload(
arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters);
- // Run the layer
+ // Run the layer.
m_PoolingLayer.configure(&input, &output, layerInfo);
}
-template class ClPooling2dBaseWorkload<DataType::Float32>;
+template class ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>;
template class ClPooling2dBaseWorkload<DataType::QuantisedAsymm8>;
}
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
index 828f000505..aea32c9e86 100644
--- a/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp
@@ -14,12 +14,12 @@ arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const Pooling2dDescriptor& descriptor);
-// Base class template providing an implementation of the Pooling2d layer common to all data types
-template <armnn::DataType dataType>
-class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType>
+// Base class template providing an implementation of the Pooling2d layer common to all data types.
+template <armnn::DataType... dataTypes>
+class ClPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>
{
public:
- using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data;
+ using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data;
ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info,
const std::string& name);
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
index a7f5855b8a..3a5b8ca526 100644
--- a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.cpp
@@ -10,13 +10,13 @@ namespace armnn
ClPooling2dFloat32Workload::ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : ClPooling2dBaseWorkload<DataType::Float32>(descriptor, info, "ClPooling2dFloat32Workload")
+ : ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>(descriptor, info, "ClPooling2dFloat32Workload")
{
}
void ClPooling2dFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloat32Workload_Execute");
m_PoolingLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
index 3456a2cff8..ad189bdb52 100644
--- a/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class ClPooling2dFloat32Workload : public ClPooling2dBaseWorkload<DataType::Float32>
+class ClPooling2dFloat32Workload : public ClPooling2dBaseWorkload<DataType::Float16, DataType::Float32>
{
public:
ClPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
index 2d2109e252..94cf753f5a 100644
--- a/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClPooling2dUint8Workload.cpp
@@ -16,7 +16,7 @@ ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescripto
void ClPooling2dUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClPooling2dUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute");
m_PoolingLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
index 7b4ad4415b..05fba222ac 100644
--- a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.cpp
@@ -11,7 +11,7 @@ namespace armnn
{
ClReshapeFloat32Workload::ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info)
- : Float32Workload<ReshapeQueueDescriptor>(descriptor, info)
+ : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClReshapeFloat32Workload", 1, 1);
@@ -23,7 +23,7 @@ ClReshapeFloat32Workload::ClReshapeFloat32Workload(const ReshapeQueueDescriptor&
void ClReshapeFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloat32Workload_Execute");
m_Layer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
index e344ee08ad..0eb4d08da0 100644
--- a/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClReshapeFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class ClReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+class ClReshapeFloat32Workload : public FloatWorkload<ReshapeQueueDescriptor>
{
public:
ClReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
index 36cc1dec17..050fb9aa33 100644
--- a/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClReshapeUint8Workload.cpp
@@ -21,7 +21,7 @@ ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& des
void ClReshapeUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClReshapeUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute");
m_Layer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
index d71011a2e3..abef682611 100644
--- a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.cpp
@@ -14,7 +14,7 @@ namespace armnn
ClResizeBilinearFloat32Workload::ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<ResizeBilinearQueueDescriptor>(descriptor, info)
+ : FloatWorkload<ResizeBilinearQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("ClResizeBilinearFloat32Workload", 1, 1);
@@ -28,7 +28,7 @@ ClResizeBilinearFloat32Workload::ClResizeBilinearFloat32Workload(const ResizeBil
void ClResizeBilinearFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClResizeBilinearFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloat32Workload_Execute");
m_ResizeBilinearLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
index 5f70e71619..81c0566bb3 100644
--- a/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClResizeBilinearFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class ClResizeBilinearFloat32Workload : public Float32Workload<ResizeBilinearQueueDescriptor>
+class ClResizeBilinearFloat32Workload : public FloatWorkload<ResizeBilinearQueueDescriptor>
{
public:
ClResizeBilinearFloat32Workload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp
new file mode 100644
index 0000000000..cd3107cfe1
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp
@@ -0,0 +1,28 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "ClSoftmaxBaseWorkload.hpp"
+
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output)
+{
+ // NOTE: We report 4D Softmax as unsupported until full support is added to ACL
+ if(input.GetShape().GetNumDimensions() >= 4u)
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported");
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo);
+}
+
+}
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp
new file mode 100644
index 0000000000..e0113134af
--- /dev/null
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp
@@ -0,0 +1,16 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/ClWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output);
+
+} // namespace armnn
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
index 1d05172b42..08247bc593 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.cpp
@@ -12,7 +12,7 @@ namespace armnn
ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info)
, m_SoftmaxLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("ClSoftmaxFloat32Workload", 1, 1);
@@ -24,7 +24,7 @@ ClSoftmaxFloat32Workload::ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor&
void ClSoftmaxFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloat32Workload_Execute");
m_SoftmaxLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
index cf5c45ac6f..6cad59800b 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxFloat32Workload.hpp
@@ -14,7 +14,7 @@
namespace armnn
{
-class ClSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+class ClSoftmaxFloat32Workload : public FloatWorkload<SoftmaxQueueDescriptor>
{
public:
ClSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
diff --git a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
index ee9ab4754b..3cd9a6a5ec 100644
--- a/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp
@@ -33,7 +33,7 @@ ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& des
void ClSoftmaxUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSoftmaxUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute");
m_SoftmaxLayer.run();
}
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
index 6221d56766..8a622c6caf 100644
--- a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void ClSplitterFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloat32Workload_Execute");
ClBaseSplitterWorkload::Execute();
}
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
index cfc7eaa3c2..affa9f840f 100644
--- a/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
+++ b/src/armnn/backends/ClWorkloads/ClSplitterFloat32Workload.hpp
@@ -10,10 +10,10 @@
namespace armnn
{
-class ClSplitterFloat32Workload : public ClBaseSplitterWorkload<DataType::Float32>
+class ClSplitterFloat32Workload : public ClBaseSplitterWorkload<DataType::Float16, DataType::Float32>
{
public:
- using ClBaseSplitterWorkload<DataType::Float32>::ClBaseSplitterWorkload;
+ using ClBaseSplitterWorkload<DataType::Float16, DataType::Float32>::ClBaseSplitterWorkload;
virtual void Execute() const override;
};
diff --git a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
index 3aa470894c..d2d25495e0 100644
--- a/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
+++ b/src/armnn/backends/ClWorkloads/ClSplitterUint8Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void ClSplitterUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "ClSplitterUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute");
ClBaseSplitterWorkload::Execute();
}