aboutsummaryrefslogtreecommitdiff
path: root/src/armnn/backends/NeonWorkloads
diff options
context:
space:
mode:
Diffstat (limited to 'src/armnn/backends/NeonWorkloads')
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp27
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp7
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp13
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp7
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp25
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp11
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp11
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp75
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp41
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp26
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp43
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp26
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp69
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp13
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp7
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp8
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp46
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp19
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp41
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp8
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp39
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp6
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp67
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp15
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp16
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp5
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp22
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp20
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp23
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp5
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp23
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp6
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp16
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp13
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp8
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp8
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp5
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp3
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp30
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp17
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp6
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp2
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp4
-rw-r--r--src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp2
61 files changed, 764 insertions, 198 deletions
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp
index 39e55d5761..711bfceeaf 100644
--- a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp
@@ -9,9 +9,32 @@
namespace armnn
{
+
+arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ActivationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ const arm_compute::ActivationLayerInfo activationLayerInfo =
+ ConvertActivationDescriptorToAclActivationLayerInfo(descriptor);
+
+ if (input.GetDataType() == DataType::QuantisedAsymm8 &&
+ activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC)
+ {
+ return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR,
+ "Neon: Logistic Activations unsupported with QAsymm8 data type."};
+ }
+
+ return arm_compute::NEActivationLayer::validate(&aclInput,
+ &aclOutput,
+ activationLayerInfo);
+}
+
NeonActivationFloat32Workload::NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<ActivationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<ActivationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonActivationFloat32Workload", 1, 1);
@@ -26,7 +49,7 @@ NeonActivationFloat32Workload::NeonActivationFloat32Workload(const ActivationQue
void NeonActivationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloat32Workload_Execute");
m_ActivationLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp
index 6fa83ea2f6..0d26b3b39f 100644
--- a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp
@@ -9,7 +9,12 @@
namespace armnn
{
-class NeonActivationFloat32Workload : public Float32Workload<ActivationQueueDescriptor>
+
+arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const ActivationDescriptor& descriptor);
+
+class NeonActivationFloat32Workload : public FloatWorkload<ActivationQueueDescriptor>
{
public:
NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp
index 27c37e9425..f2e42338b2 100644
--- a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp
@@ -13,15 +13,8 @@ NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDe
const WorkloadInfo& info)
: Uint8Workload<ActivationQueueDescriptor>(descriptor, info)
{
-
- std::string reasonIfUnsupported;
- if (!IsNeonActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters))
- {
- throw InvalidArgumentException(reasonIfUnsupported);
- }
-
- // Only BoundedReLu is supported (see IsNeonActivationUint8Supported)
- arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function);
+ arm_compute::ActivationLayerInfo layerInfo(activation,
m_Data.m_Parameters.m_A,
m_Data.m_Parameters.m_B);
@@ -35,7 +28,7 @@ NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDe
void NeonActivationUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute");
m_ActivationLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp
index d1fb64093d..f26e42aff9 100644
--- a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp
@@ -4,14 +4,30 @@
//
#include "NeonAdditionFloat32Workload.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
#include "backends/CpuTensorHandle.hpp"
namespace armnn
{
+arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ return arm_compute::NEArithmeticAddition::validate(&aclInput0,
+ &aclInput1,
+ &aclOutput,
+ arm_compute::ConvertPolicy::SATURATE);
+}
+
+
NeonAdditionFloat32Workload::NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<AdditionQueueDescriptor>(descriptor, info)
+ : FloatWorkload<AdditionQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonAdditionFloat32Workload", 2, 1);
@@ -24,7 +40,7 @@ NeonAdditionFloat32Workload::NeonAdditionFloat32Workload(const AdditionQueueDesc
void NeonAdditionFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonAdditionFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloat32Workload_Execute");
m_AddLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp
index 5b75b502a3..dae66bb69d 100644
--- a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp
@@ -9,7 +9,12 @@
namespace armnn
{
-class NeonAdditionFloat32Workload : public Float32Workload<AdditionQueueDescriptor>
+
+arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output);
+
+class NeonAdditionFloat32Workload : public FloatWorkload<AdditionQueueDescriptor>
{
public:
NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp
index 247ebfc5dd..e0ad408424 100644
--- a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp
@@ -5,23 +5,27 @@
#pragma once
+#include <arm_compute/core/Types.h>
#include <backends/ArmComputeTensorUtils.hpp>
#include <backends/CpuTensorHandle.hpp>
#include <backends/NeonTensorHandle.hpp>
+#include <backends/NeonWorkloadUtils.hpp>
#include <backends/Workload.hpp>
+#include <Half.hpp>
#include <boost/cast.hpp>
+#include "Half.hpp"
namespace armnn
{
-// Base class template providing an implementation of the Constant layer common to all data types
-template <armnn::DataType DataFormat>
-class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormat>
+// Base class template providing an implementation of the Constant layer common to all data types.
+template <armnn::DataType... DataFormats>
+class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormats...>
{
public:
NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info)
- : TypedWorkload<ConstantQueueDescriptor, DataFormat>(descriptor, info)
+ : TypedWorkload<ConstantQueueDescriptor, DataFormats...>(descriptor, info)
, m_RanOnce(false)
{
}
@@ -41,15 +45,22 @@ public:
BOOST_ASSERT(data.m_LayerOutput != nullptr);
arm_compute::ITensor& output =
boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor();
+ arm_compute::DataType computeDataType =
+ boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType();
- switch (DataFormat)
+ switch (computeDataType)
{
- case DataType::Float32:
+ case arm_compute::DataType::F16:
+ {
+ CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<Half>(), output);
+ break;
+ }
+ case arm_compute::DataType::F32:
{
CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<float>(), output);
break;
}
- case DataType::QuantisedAsymm8:
+ case arm_compute::DataType::QASYMM8:
{
CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output);
break;
diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp
index 24640c7adb..6a87d62320 100644
--- a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp
@@ -5,20 +5,21 @@
#pragma once
+#include <backends/NeonWorkloadUtils.hpp>
#include <backends/Workload.hpp>
namespace armnn
{
-// Base class template providing an implementation of the Merger layer common to all data types
-template <armnn::DataType DataType>
-class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataType>
+// Base class template providing an implementation of the Merger layer common to all data types.
+template <armnn::DataType... DataTypes>
+class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...>
{
public:
- using TypedWorkload<MergerQueueDescriptor, DataType>::TypedWorkload;
+ using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload;
virtual void Execute() const override
{
- // With subtensors, merger is a no-op
+ // With subtensors, merger is a no-op.
}
};
diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp
index 769905b48b..769291c700 100644
--- a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp
@@ -6,20 +6,21 @@
#pragma once
#include <backends/Workload.hpp>
+#include <backends/NeonWorkloadUtils.hpp>
namespace armnn
{
-// Base class template providing an implementation of the Splitter layer common to all data types
-template <armnn::DataType DataType>
-class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataType>
+// Base class template providing an implementation of the Splitter layer common to all data types.
+template <armnn::DataType... DataTypes>
+class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...>
{
public:
- using TypedWorkload<SplitterQueueDescriptor, DataType>::TypedWorkload;
+ using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload;
virtual void Execute() const override
{
- // With subtensors, splitter is a no-op
+ // With subtensors, splitter is a no-op.
}
};
diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp
index f107c8137f..ca5c8202cd 100644
--- a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp
@@ -6,40 +6,91 @@
#include "NeonBatchNormalizationFloat32Workload.hpp"
#include "backends/CpuTensorHandle.hpp"
#include "backends/ArmComputeTensorUtils.hpp"
+#include "../../../../include/armnn/ArmNN.hpp"
namespace armnn
{
using namespace armcomputetensorutils;
+
+arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
+ const BatchNormalizationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean);
+ const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var);
+ const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta);
+ const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma);
+
+ return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo,
+ &aclOutputInfo,
+ &aclMeanInfo,
+ &aclVarInfo,
+ &aclBetaInfo,
+ &aclGammaInfo,
+ descriptor.m_Eps);
+}
+
NeonBatchNormalizationFloat32Workload::NeonBatchNormalizationFloat32Workload(
const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
- : Float32Workload<BatchNormalizationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloat32Workload", 1, 1);
arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo());
- BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo());
- BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo());
- BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo());
+ m_Mean = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo());
+
+ m_Variance = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo());
- m_Layer.configure(
- &input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps);
+ m_Gamma = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo());
- InitialiseArmComputeTensorData(m_Mean, m_Data.m_Mean->GetConstTensor<float>());
- InitialiseArmComputeTensorData(m_Variance, m_Data.m_Variance->GetConstTensor<float>());
- InitialiseArmComputeTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor<float>());
- InitialiseArmComputeTensorData(m_Beta, m_Data.m_Beta->GetConstTensor<float>());
+ m_Beta = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo());
+
+ m_Layer.configure(&input,
+ &output,
+ m_Mean.get(),
+ m_Variance.get(),
+ m_Beta.get(),
+ m_Gamma.get(),
+ m_Data.m_Parameters.m_Eps);
+
+ InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean);
+ InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance);
+ InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma);
+ InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta);
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_Layer.prepare();
+ FreeUnusedTensors();
}
void NeonBatchNormalizationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonBatchNormalizationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloat32Workload_Execute");
m_Layer.run();
}
+void NeonBatchNormalizationFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_Mean);
+ FreeTensorIfUnused(m_Variance);
+ FreeTensorIfUnused(m_Gamma);
+ FreeTensorIfUnused(m_Beta);
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp
index 2050d42859..5eb5601f26 100644
--- a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp
@@ -10,7 +10,15 @@
namespace armnn
{
-class NeonBatchNormalizationFloat32Workload : public Float32Workload<BatchNormalizationQueueDescriptor>
+arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& mean,
+ const TensorInfo& var,
+ const TensorInfo& beta,
+ const TensorInfo& gamma,
+ const BatchNormalizationDescriptor& descriptor);
+
+class NeonBatchNormalizationFloat32Workload : public FloatWorkload<BatchNormalizationQueueDescriptor>
{
public:
NeonBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor,
@@ -20,10 +28,12 @@ public:
private:
mutable arm_compute::NEBatchNormalizationLayer m_Layer;
- arm_compute::Tensor m_Mean;
- arm_compute::Tensor m_Variance;
- arm_compute::Tensor m_Gamma;
- arm_compute::Tensor m_Beta;
+ std::unique_ptr<arm_compute::Tensor> m_Mean;
+ std::unique_ptr<arm_compute::Tensor> m_Variance;
+ std::unique_ptr<arm_compute::Tensor> m_Gamma;
+ std::unique_ptr<arm_compute::Tensor> m_Beta;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp
index 8b203fbf3a..4e5d570a8e 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void NeonConstantFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloat32Workload_Execute");
NeonBaseConstantWorkload::Execute();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp
index 4ea4dfe127..050954df24 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp
@@ -10,10 +10,10 @@
namespace armnn
{
-class NeonConstantFloat32Workload : public NeonBaseConstantWorkload<DataType::Float32>
+class NeonConstantFloat32Workload : public NeonBaseConstantWorkload<DataType::Float16, DataType::Float32>
{
public:
- using NeonBaseConstantWorkload<DataType::Float32>::NeonBaseConstantWorkload;
+ using NeonBaseConstantWorkload<DataType::Float16, DataType::Float32>::NeonBaseConstantWorkload;
virtual void Execute() const override;
};
diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp
index f6dfaeb7a7..4061605bc1 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void NeonConstantUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute");
NeonBaseConstantWorkload::Execute();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp
new file mode 100644
index 0000000000..84fc051f65
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp
@@ -0,0 +1,41 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConvertFp16ToFp32Workload.hpp"
+#include "Half.hpp"
+#include "FloatingPointConverter.hpp"
+
+#include "backends/WorkloadUtils.hpp"
+
+namespace armnn
+{
+
+NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1);
+ GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
+}
+
+void NeonConvertFp16ToFp32Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute");
+
+ auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
+ {
+ auto input = reinterpret_cast<const Half*>(src);
+ auto output = reinterpret_cast<float*>(dst);
+ size_t numElements = size/2; // 2 bytes per fp16
+ armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output);
+ };
+
+ for (const auto& pair : m_TensorHandlePairs)
+ {
+ CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp
new file mode 100644
index 0000000000..136c0d8a76
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+#include "backends/NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>
+{
+public:
+ NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>;
+ std::vector<TensorHandlePair> m_TensorHandlePairs;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp
new file mode 100644
index 0000000000..61f30522a8
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp
@@ -0,0 +1,43 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonConvertFp32ToFp16Workload.hpp"
+
+#include "Half.hpp"
+#include "FloatingPointConverter.hpp"
+
+#include "Profiling.hpp"
+#include "backends/WorkloadUtils.hpp"
+
+namespace armnn
+{
+
+NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info)
+{
+ this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1);
+ GatherTensorHandlePairs(descriptor, m_TensorHandlePairs);
+}
+
+void NeonConvertFp32ToFp16Workload::Execute() const
+{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute");
+
+ auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
+ {
+ auto input = reinterpret_cast<const float*>(src);
+ auto output = reinterpret_cast<Half*>(dst);
+ size_t numElements = size/2; // 2 bytes per fp16
+ armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output);
+ };
+
+ for (const auto& pair : m_TensorHandlePairs)
+ {
+ CopyTensorContentsGeneric(pair.first, pair.second, convertFunc);
+ }
+}
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp
new file mode 100644
index 0000000000..f48c365c48
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp
@@ -0,0 +1,26 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/Workload.hpp"
+#include "backends/WorkloadData.hpp"
+#include "backends/NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>
+{
+public:
+ NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+
+private:
+ using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>;
+ std::vector<TensorHandlePair> m_TensorHandlePairs;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
index 423f02bcb0..e76afb6cf7 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp
@@ -9,6 +9,9 @@
#include "NeonConvolution2dBaseWorkload.hpp"
+#include "armnn/Types.hpp"
+#include "Half.hpp"
+
namespace armnn
{
@@ -41,28 +44,28 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
layerInfo);
}
-template<armnn::DataType dataType>
-NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : TypedWorkload<Convolution2dQueueDescriptor, dataType>(descriptor, info)
+template<armnn::DataType... dataTypes>
+NeonConvolution2dBaseWorkload<dataTypes...>::NeonConvolution2dBaseWorkload(
+ const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>(descriptor, info)
{
using arm_compute::NEDirectConvolutionLayer;
- using namespace armcomputetensorutils;
ValidateData();
- // todo: check tensor shapes match
+ // todo: check tensor shapes match.
arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- BuildArmComputeTensor(m_KernelTensor, m_Data.m_Weight->GetTensorInfo());
+ m_KernelTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo());
- arm_compute::Tensor* optionalBiasTensor = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBiasTensor = &m_BiasTensor;
+ m_BiasTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
}
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
@@ -81,8 +84,8 @@ NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Con
{
auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>(memoryManager);
directConvolutionLayer->configure(&input,
- &m_KernelTensor,
- optionalBiasTensor,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
m_ConvolutionLayer.reset(directConvolutionLayer.release());
@@ -91,22 +94,50 @@ NeonConvolution2dBaseWorkload<dataType>::NeonConvolution2dBaseWorkload(const Con
{
auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager);
convolutionLayer->configure(&input,
- &m_KernelTensor,
- optionalBiasTensor,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
m_ConvolutionLayer.reset(convolutionLayer.release());
}
BOOST_ASSERT(m_ConvolutionLayer);
- using Type = ResolveType<dataType>;
+ armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType();
+
+ switch (dataType)
+ {
+ case DataType::Float16:
+ {
+ InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Half>());
+ break;
+ }
+ case DataType::Float32:
+ {
+ InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<float>());
+ break;
+ }
+ case DataType::QuantisedAsymm8:
+ {
+ InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<uint8_t>());
+ break;
+ }
+ default:
+ {
+ BOOST_ASSERT_MSG(false, "Unknown DataType.");
+ }
+ }
+}
- InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Type>());
+template<armnn::DataType... dataTypes>
+void NeonConvolution2dBaseWorkload<dataTypes...>::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
}
-// Generate known implementations for linker
-template class NeonConvolution2dBaseWorkload<DataType::Float32>;
-template class NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8>;
+// Generates known implementations for linker.
+template class NeonConvolution2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>;
+template class NeonConvolution2dBaseWorkload<armnn::DataType::QuantisedAsymm8>;
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
index d28d50d819..524d2c90b6 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp
@@ -25,11 +25,11 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input,
const TensorInfo& weights,
const TensorInfo& biases);
-template<armnn::DataType dataType>
-class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataType>
+template<armnn::DataType... dataTypes>
+class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>
{
public:
- using TypedWorkload<Convolution2dQueueDescriptor, dataType>::m_Data;
+ using TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>::m_Data;
NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager);
@@ -38,8 +38,11 @@ public:
protected:
std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer;
- arm_compute::Tensor m_KernelTensor;
- arm_compute::Tensor m_BiasTensor;
+
+ std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
index f20f2a4ac5..18ec6ca2e7 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp
@@ -18,13 +18,16 @@ NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolu
{
if (m_Data.m_Parameters.m_BiasEnabled)
{
- InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<float>());
+ InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias);
}
+
+ m_ConvolutionLayer->prepare();
+ FreeUnusedTensors();
}
void NeonConvolution2dFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloat32Workload_Execute");
m_ConvolutionLayer->run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
index 56b0848efa..0bb8d69d94 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp
@@ -15,7 +15,7 @@
namespace armnn
{
-class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload<DataType::Float32>
+class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload<DataType::Float16, DataType::Float32>
{
public:
NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info,
diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
index fb91f7b7b2..bb33e939ea 100644
--- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp
@@ -14,14 +14,16 @@ NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution
{
if (m_Data.m_Parameters.m_BiasEnabled)
{
- InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>());
+ InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>());
}
-}
+ m_ConvolutionLayer->prepare();
+ FreeUnusedTensors();
+}
void NeonConvolution2dUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute");
m_ConvolutionLayer->run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp
new file mode 100644
index 0000000000..58d6061537
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp
@@ -0,0 +1,46 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonDepthwiseConvolutionBaseWorkload.hpp"
+
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases)
+{
+ const arm_compute::TensorInfo aclInputInfo =
+ armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo =
+ armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeightsInfo =
+ armcomputetensorutils::BuildArmComputeTensorInfo(weights);
+
+ arm_compute::TensorInfo aclBiasesInfo;
+ arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases);
+ optionalAclBiasesInfo = &aclBiasesInfo;
+ }
+
+ const arm_compute::PadStrideInfo aclPadStrideInfo =
+ armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor);
+ const unsigned int aclDepthMultiplier = weights.GetShape()[0];
+
+ return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
+ &aclWeightsInfo,
+ optionalAclBiasesInfo,
+ &aclOutputInfo,
+ aclPadStrideInfo,
+ aclDepthMultiplier);
+}
+
+}
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp
new file mode 100644
index 0000000000..0cead354f8
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp
@@ -0,0 +1,19 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const DepthwiseConvolution2dDescriptor& descriptor,
+ const TensorInfo& weights,
+ const TensorInfo& biases);
+
+} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp
index 11e31c727a..f94cd903b6 100644
--- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp
@@ -16,23 +16,17 @@ using namespace armcomputetensorutils;
NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload(
const DepthwiseConvolution2dQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
+ : FloatWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
- std::string reasonIfUnsupported;
- if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
- {
- throw UnimplementedException(reasonIfUnsupported);
- }
+ m_KernelTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo);
- BuildArmComputeTensor(m_KernelTensor, weightInfo);
-
- arm_compute::Tensor* optionalBias = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBias = &m_BiasTensor;
+ m_BiasTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
}
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
@@ -54,8 +48,8 @@ NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>(
m_pDepthwiseConvolutionLayer.get())->configure(&input,
- &m_KernelTensor,
- optionalBias,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
}
@@ -64,28 +58,37 @@ NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
m_pDepthwiseConvolutionLayer.get())->configure(&input,
- &m_KernelTensor,
- optionalBias,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
}
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
- InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<float>());
+ InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight);
- if (optionalBias)
+ if (m_BiasTensor)
{
- InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<float>());
+ InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias);
}
+
+ m_pDepthwiseConvolutionLayer->prepare();
+ FreeUnusedTensors();
}
void NeonDepthwiseConvolutionFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloat32Workload_Execute");
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
m_pDepthwiseConvolutionLayer->run();
}
+void NeonDepthwiseConvolutionFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp
index f9e295f568..ece9f1877b 100644
--- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class NeonDepthwiseConvolutionFloat32Workload : public Float32Workload<DepthwiseConvolution2dQueueDescriptor>
+class NeonDepthwiseConvolutionFloat32Workload : public FloatWorkload<DepthwiseConvolution2dQueueDescriptor>
{
public:
NeonDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor,
@@ -20,8 +20,10 @@ public:
private:
mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
- arm_compute::Tensor m_KernelTensor;
- arm_compute::Tensor m_BiasTensor;
+ std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp
index bd034c4f80..45fbcb37ab 100644
--- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp
@@ -20,19 +20,13 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload(
{
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
- std::string reasonIfUnsupported;
- if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo))
- {
- throw UnimplementedException(reasonIfUnsupported);
- }
+ m_KernelTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_KernelTensor, weightInfo);
- BuildArmComputeTensor(m_KernelTensor, weightInfo);
-
- arm_compute::Tensor* optionalBias = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBias = &m_BiasTensor;
+ m_BiasTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo());
}
arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX,
@@ -54,8 +48,8 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload(
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>();
static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>(
m_pDepthwiseConvolutionLayer.get())->configure(&input,
- &m_KernelTensor,
- optionalBias,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
}
@@ -64,28 +58,37 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload(
m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>();
static_cast<arm_compute::NEDepthwiseConvolutionLayer*>(
m_pDepthwiseConvolutionLayer.get())->configure(&input,
- &m_KernelTensor,
- optionalBias,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
&output,
padStrideInfo);
}
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
- InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
+ InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>());
- if (optionalBias)
+ if (m_BiasTensor)
{
- InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor<int32_t>());
+ InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor<int32_t>());
}
+
+ m_pDepthwiseConvolutionLayer->prepare();
+ FreeUnusedTensors();
}
void NeonDepthwiseConvolutionUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute");
BOOST_ASSERT(m_pDepthwiseConvolutionLayer);
m_pDepthwiseConvolutionLayer->run();
}
+void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_KernelTensor);
+ FreeTensorIfUnused(m_BiasTensor);
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp
index 9cf272e9f5..aca0ba5337 100644
--- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp
@@ -20,8 +20,10 @@ public:
private:
mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer;
- arm_compute::Tensor m_KernelTensor;
- arm_compute::Tensor m_BiasTensor;
+ std::unique_ptr<arm_compute::Tensor> m_KernelTensor;
+ std::unique_ptr<arm_compute::Tensor> m_BiasTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp
index a5eec5cadb..c43cfa9c46 100644
--- a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp
@@ -9,7 +9,7 @@ namespace armnn
{
NeonFloorFloat32Workload::NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<FloorQueueDescriptor>(descriptor, info)
+ : FloatWorkload<FloorQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonFloorFloat32Workload", 1, 1);
@@ -21,7 +21,7 @@ NeonFloorFloat32Workload::NeonFloorFloat32Workload(const FloorQueueDescriptor& d
void NeonFloorFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFloorFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloat32Workload_Execute");
m_Layer.run();
}
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp
index f876f1e1bb..56680f1e39 100644
--- a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class NeonFloorFloat32Workload : public Float32Workload<FloorQueueDescriptor>
+class NeonFloorFloat32Workload : public FloatWorkload<FloorQueueDescriptor>
{
public:
NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
index e1c4448642..c3af41e20d 100644
--- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp
@@ -4,16 +4,47 @@
//
#include "NeonFullyConnectedFloat32Workload.hpp"
-#include "backends/CpuTensorHandle.hpp"
+
#include "backends/ArmComputeTensorUtils.hpp"
+#include "backends/ArmComputeUtils.hpp"
+#include "backends/CpuTensorHandle.hpp"
namespace armnn
{
using namespace armcomputetensorutils;
+arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output);
+ const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
+
+ arm_compute::TensorInfo aclBiases;
+ arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ if (descriptor.m_BiasEnabled)
+ {
+ aclBiases = BuildArmComputeTensorInfo(biases);
+ optionalAclBiases = &aclBiases;
+ }
+
+ const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor);
+
+
+ return arm_compute::NEFullyConnectedLayer::validate(&aclInput,
+ &aclWeights,
+ optionalAclBiases,
+ &aclOutput,
+ fullyConnectedLayerInfo);
+}
+
NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor,
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<FullyConnectedQueueDescriptor>(descriptor, info)
+ : FloatWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
, m_FullyConnectedLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonFullyConnectedFloat32Workload", 1, 1);
@@ -21,33 +52,45 @@ NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const Fully
arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
+ m_WeightsTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
- arm_compute::Tensor* optionalBiasTensor = nullptr;
if (m_Data.m_Parameters.m_BiasEnabled)
{
- BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
- optionalBiasTensor = &m_BiasesTensor;
+ m_BiasesTensor = std::make_unique<arm_compute::Tensor>();
+ BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo());
}
// Construct
- m_FullyConnectedLayer.configure(
- &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix);
+ arm_compute::FullyConnectedLayerInfo fc_info;
+ fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix;
+ m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
// Allocate
- InitialiseArmComputeTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor<float>());
+ InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight);
- if (optionalBiasTensor)
+ if (m_BiasesTensor)
{
- InitialiseArmComputeTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor<float>());
+ InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias);
}
+
+ // Force Compute Library to perform the necessary copying and reshaping, after which
+ // delete all the input tensors that will no longer be needed
+ m_FullyConnectedLayer.prepare();
+ FreeUnusedTensors();
}
void NeonFullyConnectedFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFullyConnectedFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloat32Workload_Execute");
m_FullyConnectedLayer.run();
}
+void NeonFullyConnectedFloat32Workload::FreeUnusedTensors()
+{
+ FreeTensorIfUnused(m_WeightsTensor);
+ FreeTensorIfUnused(m_BiasesTensor);
+}
+
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
index 9c722dc573..684b5e0753 100644
--- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp
@@ -14,7 +14,13 @@
namespace armnn
{
-class NeonFullyConnectedFloat32Workload : public Float32Workload<FullyConnectedQueueDescriptor>
+arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const TensorInfo& weights,
+ const TensorInfo& biases,
+ const FullyConnectedDescriptor& descriptor);
+
+class NeonFullyConnectedFloat32Workload : public FloatWorkload<FullyConnectedQueueDescriptor>
{
public:
NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info,
@@ -23,8 +29,11 @@ public:
private:
mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer;
- arm_compute::Tensor m_WeightsTensor;
- arm_compute::Tensor m_BiasesTensor;
+
+ std::unique_ptr<arm_compute::Tensor> m_WeightsTensor;
+ std::unique_ptr<arm_compute::Tensor> m_BiasesTensor;
+
+ void FreeUnusedTensors();
};
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
index 9f79fa09de..a3ae33f41f 100644
--- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp
@@ -9,9 +9,21 @@
namespace armnn
{
+arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ arm_compute::NormalizationLayerInfo normalizationInfo =
+ CreateAclNormalizationLayerInfoForL2Normalization(input);
+
+ return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
+}
+
NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor,
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<L2NormalizationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
, m_Layer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonL2NormalizationFloat32Workload", 1, 1);
@@ -23,7 +35,7 @@ NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2N
void NeonL2NormalizationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonL2NormalizationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloat32Workload_Execute");
m_Layer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
index 2b4a1fef37..c3fcde5a57 100644
--- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp
@@ -14,7 +14,10 @@
namespace armnn
{
-class NeonL2NormalizationFloat32Workload : public Float32Workload<L2NormalizationQueueDescriptor>
+arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output);
+
+class NeonL2NormalizationFloat32Workload : public FloatWorkload<L2NormalizationQueueDescriptor>
{
public:
NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info,
diff --git a/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp
new file mode 100644
index 0000000000..ba1369e179
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp
@@ -0,0 +1,22 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonLstmFloat32Workload.hpp"
+
+namespace armnn
+{
+NeonLstmFloat32Workload::NeonLstmFloat32Workload(const LstmQueueDescriptor& descriptor,
+ const WorkloadInfo& info)
+ : FloatWorkload<LstmQueueDescriptor>(descriptor, info)
+{
+ m_Data.ValidateInputsOutputs("NeonLstmFloat32Workload", 1, 1);
+}
+
+void NeonLstmFloat32Workload::Execute() const
+{
+ throw armnn::Exception("No implementation of Lstm in the Neon backend!");
+}
+
+} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp
new file mode 100644
index 0000000000..78ee1da341
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp
@@ -0,0 +1,20 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include <backends/NeonWorkloadUtils.hpp>
+
+namespace armnn
+{
+
+class NeonLstmFloat32Workload : public FloatWorkload<LstmQueueDescriptor>
+{
+public:
+ NeonLstmFloat32Workload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info);
+ virtual void Execute() const override;
+};
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp
index 7520e8768e..30dd283620 100644
--- a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void NeonMergerFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloat32Workload_Execute");
NeonBaseMergerWorkload::Execute();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp
index 5c889c2af0..7b8ee9881f 100644
--- a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp
@@ -10,10 +10,10 @@
namespace armnn
{
-class NeonMergerFloat32Workload : public NeonBaseMergerWorkload<DataType::Float32>
+class NeonMergerFloat32Workload : public NeonBaseMergerWorkload<DataType::Float16, DataType::Float32>
{
public:
- using NeonBaseMergerWorkload<DataType::Float32>::NeonBaseMergerWorkload;
+ using NeonBaseMergerWorkload<DataType::Float16, DataType::Float32>::NeonBaseMergerWorkload;
virtual void Execute() const override;
};
diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp
index 51578e5bff..caccdd443a 100644
--- a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void NeonMergerUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute");
NeonBaseMergerWorkload::Execute();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp
index 58ce7b74ba..a8a3cd77b4 100644
--- a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp
@@ -9,9 +9,28 @@
namespace armnn
{
+arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output)
+{
+ const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0);
+ const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it,
+ // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be
+ // ignored for F32 tensors.
+ return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1,
+ &aclInput2,
+ &aclOutput,
+ 1.0f,
+ arm_compute::ConvertPolicy::SATURATE,
+ arm_compute::RoundingPolicy::TO_ZERO);
+}
+
NeonMultiplicationFloat32Workload::NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<MultiplicationQueueDescriptor>(descriptor, info)
+ : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonMultiplicationFloat32Workload", 2, 1);
@@ -32,7 +51,7 @@ NeonMultiplicationFloat32Workload::NeonMultiplicationFloat32Workload(const Multi
void NeonMultiplicationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonMultiplicationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloat32Workload_Execute");
m_PixelWiseMultiplication.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp
index ed5ead3700..62e84a2e07 100644
--- a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp
@@ -9,8 +9,11 @@
namespace armnn
{
+arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0,
+ const TensorInfo& input1,
+ const TensorInfo& output);
-class NeonMultiplicationFloat32Workload : public Float32Workload<MultiplicationQueueDescriptor>
+class NeonMultiplicationFloat32Workload : public FloatWorkload<MultiplicationQueueDescriptor>
{
public:
NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
index 0fd0dcc420..20936a2760 100644
--- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp
@@ -6,13 +6,28 @@
#include "NeonNormalizationFloat32Workload.hpp"
#include "backends/NeonLayerSupport.hpp"
#include "backends/ArmComputeUtils.hpp"
+#include "backends/ArmComputeTensorUtils.hpp"
namespace armnn
{
+arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const NormalizationDescriptor& descriptor)
+{
+ const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ arm_compute::NormalizationLayerInfo normalizationInfo =
+ armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor);
+
+ return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo);
+}
+
NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<NormalizationQueueDescriptor>(descriptor, info)
+ const WorkloadInfo& info,
+ std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
, m_NormalizationLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonNormalizationFloat32Workload", 1, 1);
@@ -22,7 +37,7 @@ NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const Normali
throw UnimplementedException(reasonIfUnsupported);
}
- // input and output tensors have to have the same dimensionality
+ // Input and output tensors have to have the same dimensionality.
if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1]
|| info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0]
|| info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3]
@@ -48,7 +63,7 @@ NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const Normali
void NeonNormalizationFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonNormalizationFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloat32Workload_Execute");
m_NormalizationLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
index 24b6da8528..8f0823454b 100644
--- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp
@@ -12,7 +12,11 @@
namespace armnn
{
-class NeonNormalizationFloat32Workload : public Float32Workload<NormalizationQueueDescriptor>
+arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const NormalizationDescriptor& descriptor);
+
+class NeonNormalizationFloat32Workload : public FloatWorkload<NormalizationQueueDescriptor>
{
public:
NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info,
diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp
index e0a0457422..c27797ee4e 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp
@@ -24,10 +24,10 @@ arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input,
armcomputetensorutils::BuildArmComputePermutationVector(mappings));
}
-template <armnn::DataType DataType>
-NeonPermuteWorkload<DataType>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor,
+template <armnn::DataType... DataTypes>
+NeonPermuteWorkload<DataTypes...>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : TypedWorkload<PermuteQueueDescriptor, DataType>(descriptor, info)
+ : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info)
{
using armcomputetensorutils::BuildArmComputePermutationVector;
@@ -37,18 +37,18 @@ NeonPermuteWorkload<DataType>::NeonPermuteWorkload(const PermuteQueueDescriptor&
arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
- // Run the layer
+ // Run the layer.
m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings));
}
-template <armnn::DataType DataType>
-void NeonPermuteWorkload<DataType>::Execute() const
+template <armnn::DataType... DataTypes>
+void NeonPermuteWorkload<DataTypes...>::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute");
m_PermuteFunction.run();
}
-template class NeonPermuteWorkload<DataType::Float32>;
+template class NeonPermuteWorkload<DataType::Float16, DataType::Float32>;
template class NeonPermuteWorkload<DataType::QuantisedAsymm8>;
} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp
index 56e8719d6c..06b2dc692b 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp
@@ -7,6 +7,7 @@
#include "backends/Workload.hpp"
#include "backends/WorkloadData.hpp"
+#include "backends/NeonWorkloadUtils.hpp"
#include <armnn/TypesUtils.hpp>
#include <arm_compute/runtime/NEON/functions/NEPermute.h>
@@ -18,13 +19,13 @@ namespace armnn
arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output,
const PermuteDescriptor& descriptor);
-template <armnn::DataType DataType>
-class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataType>
+template <armnn::DataType... DataTypes>
+class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...>
{
public:
static const std::string& GetName()
{
- static const std::string name = std::string("NeonPermute") + GetDataTypeName(DataType) + "Workload";
+ static const std::string name = std::string("NeonPermuteWorkload");
return name;
}
@@ -32,11 +33,11 @@ public:
void Execute() const override;
private:
- using TypedWorkload<PermuteQueueDescriptor, DataType>::m_Data;
+ using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data;
mutable arm_compute::NEPermute m_PermuteFunction;
};
-using NeonPermuteFloat32Workload = NeonPermuteWorkload<DataType::Float32>;
+using NeonPermuteFloatWorkload = NeonPermuteWorkload<DataType::Float16, DataType::Float32>;
using NeonPermuteUint8Workload = NeonPermuteWorkload<DataType::QuantisedAsymm8>;
-} //namespace armnn
+} // namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp
index 6d6a492155..3585d36ba3 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp
@@ -25,10 +25,10 @@ arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input,
return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo);
}
-template <armnn::DataType dataType>
-NeonPooling2dBaseWorkload<dataType>::NeonPooling2dBaseWorkload(
+template <armnn::DataType... dataTypes>
+NeonPooling2dBaseWorkload<dataTypes...>::NeonPooling2dBaseWorkload(
const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name)
- : TypedWorkload<Pooling2dQueueDescriptor, dataType>(descriptor, info)
+ : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info)
{
m_Data.ValidateInputsOutputs(name, 1, 1);
@@ -40,7 +40,7 @@ NeonPooling2dBaseWorkload<dataType>::NeonPooling2dBaseWorkload(
m_PoolingLayer.configure(&input, &output, layerInfo);
}
-template class NeonPooling2dBaseWorkload<DataType::Float32>;
+template class NeonPooling2dBaseWorkload<DataType::Float16, DataType::Float32>;
template class NeonPooling2dBaseWorkload<DataType::QuantisedAsymm8>;
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp
index 9461982f86..2e85e937fa 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp
@@ -14,12 +14,12 @@ arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const Pooling2dDescriptor& descriptor);
-// Base class template providing an implementation of the Pooling2d layer common to all data types
-template <armnn::DataType dataType>
-class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataType>
+// Base class template providing an implementation of the Pooling2d layer common to all data types.
+template <armnn::DataType... dataTypes>
+class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>
{
public:
- using TypedWorkload<Pooling2dQueueDescriptor, dataType>::m_Data;
+ using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data;
NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info,
const std::string& name);
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp
index ba2aa20924..cb690c51b8 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp
@@ -12,13 +12,14 @@ namespace armnn
NeonPooling2dFloat32Workload::NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : NeonPooling2dBaseWorkload<armnn::DataType::Float32>(descriptor, info, "NeonPooling2dFloat32Workload")
+ : NeonPooling2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>(descriptor, info,
+ "NeonPooling2dFloat32Workload")
{
}
void NeonPooling2dFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloat32Workload_Execute");
m_PoolingLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp
index 6cfc9cc96f..36c4e7edf1 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp
@@ -11,7 +11,8 @@
namespace armnn
{
-class NeonPooling2dFloat32Workload : public NeonPooling2dBaseWorkload<armnn::DataType::Float32>
+class NeonPooling2dFloat32Workload : public NeonPooling2dBaseWorkload<armnn::DataType::Float16,
+ armnn::DataType::Float32>
{
public:
NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp
index 0778794081..3e06d08dea 100644
--- a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp
@@ -18,7 +18,7 @@ NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescr
void NeonPooling2dUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute");
m_PoolingLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp
index 317d16f6bd..93f6eb8ef5 100644
--- a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp
@@ -12,7 +12,7 @@ namespace armnn
NeonReshapeFloat32Workload::NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : Float32Workload<ReshapeQueueDescriptor>(descriptor, info)
+ : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonReshapeFloat32Workload", 1, 1);
@@ -24,7 +24,7 @@ NeonReshapeFloat32Workload::NeonReshapeFloat32Workload(const ReshapeQueueDescrip
void NeonReshapeFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloat32Workload_Execute");
m_Layer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp
index 27f4aea9e7..3e5cca1b9e 100644
--- a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp
@@ -10,7 +10,7 @@
namespace armnn
{
-class NeonReshapeFloat32Workload : public Float32Workload<ReshapeQueueDescriptor>
+class NeonReshapeFloat32Workload : public FloatWorkload<ReshapeQueueDescriptor>
{
public:
NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info);
diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp
index 06f57c1e0f..b31bdcd3d0 100644
--- a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp
@@ -24,7 +24,7 @@ NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor&
void NeonReshapeUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute");
m_Layer.run();
}
} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp
new file mode 100644
index 0000000000..3efffafe25
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp
@@ -0,0 +1,30 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#include "NeonSoftmaxBaseWorkload.hpp"
+
+#include "backends/ArmComputeTensorUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const SoftmaxDescriptor& descriptor)
+{
+ // NOTE: We report 4D Softmax as unsupported until full support is added to ACL
+ if(input.GetShape().GetNumDimensions() >= 4u)
+ {
+ return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported");
+ }
+
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta);
+}
+
+} //namespace armnn
+
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp
new file mode 100644
index 0000000000..b9b21fb254
--- /dev/null
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp
@@ -0,0 +1,17 @@
+//
+// Copyright © 2017 Arm Ltd. All rights reserved.
+// See LICENSE file in the project root for full license information.
+//
+
+#pragma once
+
+#include "backends/NeonWorkloadUtils.hpp"
+
+namespace armnn
+{
+
+arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input,
+ const TensorInfo& output,
+ const SoftmaxDescriptor& descriptor);
+
+} //namespace armnn
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
index 5e2925ca02..027b508ad5 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp
@@ -10,12 +10,12 @@ namespace armnn
NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor,
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
- : Float32Workload<SoftmaxQueueDescriptor>(descriptor, info)
+ : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info)
, m_SoftmaxLayer(memoryManager)
{
m_Data.ValidateInputsOutputs("NeonSoftmaxFloat32Workload", 1, 1);
- // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions
+ // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions.
arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
@@ -24,7 +24,7 @@ NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescrip
void NeonSoftmaxFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSoftmaxFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloat32Workload_Execute");
m_SoftmaxLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
index 91d25b47f8..3656a26a3c 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp
@@ -14,7 +14,7 @@
namespace armnn
{
-class NeonSoftmaxFloat32Workload : public Float32Workload<SoftmaxQueueDescriptor>
+class NeonSoftmaxFloat32Workload : public FloatWorkload<SoftmaxQueueDescriptor>
{
public:
NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info,
diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
index eb4a23c13c..4b0c05b25b 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp
@@ -32,7 +32,7 @@ NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor&
void NeonSoftmaxUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClSoftmaxUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute");
m_SoftmaxLayer.run();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp
index 13701d2ed3..996fc15adb 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void NeonSplitterFloat32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterFloat32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloat32Workload_Execute");
NeonBaseSplitterWorkload::Execute();
}
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp
index 432f5de4eb..9f6dc75499 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp
@@ -10,10 +10,10 @@
namespace armnn
{
-class NeonSplitterFloat32Workload : public NeonBaseSplitterWorkload<DataType::Float32>
+class NeonSplitterFloat32Workload : public NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32>
{
public:
- using NeonBaseSplitterWorkload<DataType::Float32>::NeonBaseSplitterWorkload;
+ using NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32>::NeonBaseSplitterWorkload;
virtual void Execute() const override;
};
diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp
index 90d24d3ffd..0d6328ff7e 100644
--- a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp
+++ b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp
@@ -10,7 +10,7 @@ namespace armnn
void NeonSplitterUint8Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterUint8Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute");
NeonBaseSplitterWorkload::Execute();
}