From c577f2c6a3b4ddb6ba87a882723c53a248afbeba Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 31 Aug 2018 09:22:23 +0100 Subject: Release 18.08 --- .../NeonActivationFloat32Workload.cpp | 27 +++++++- .../NeonActivationFloat32Workload.hpp | 7 +- .../NeonWorkloads/NeonActivationUint8Workload.cpp | 13 +--- .../NeonWorkloads/NeonAdditionFloat32Workload.cpp | 20 +++++- .../NeonWorkloads/NeonAdditionFloat32Workload.hpp | 7 +- .../NeonWorkloads/NeonBaseConstantWorkload.hpp | 25 ++++++-- .../NeonWorkloads/NeonBaseMergerWorkload.hpp | 11 ++-- .../NeonWorkloads/NeonBaseSplitterWorkload.hpp | 11 ++-- .../NeonBatchNormalizationFloat32Workload.cpp | 75 ++++++++++++++++++---- .../NeonBatchNormalizationFloat32Workload.hpp | 20 ++++-- .../NeonWorkloads/NeonConstantFloat32Workload.cpp | 2 +- .../NeonWorkloads/NeonConstantFloat32Workload.hpp | 4 +- .../NeonWorkloads/NeonConstantUint8Workload.cpp | 2 +- .../NeonConvertFp16ToFp32Workload.cpp | 41 ++++++++++++ .../NeonConvertFp16ToFp32Workload.hpp | 26 ++++++++ .../NeonConvertFp32ToFp16Workload.cpp | 43 +++++++++++++ .../NeonConvertFp32ToFp16Workload.hpp | 26 ++++++++ .../NeonConvolution2dBaseWorkload.cpp | 69 ++++++++++++++------ .../NeonConvolution2dBaseWorkload.hpp | 13 ++-- .../NeonConvolution2dFloat32Workload.cpp | 7 +- .../NeonConvolution2dFloat32Workload.hpp | 2 +- .../NeonConvolution2dUint8Workload.cpp | 8 ++- .../NeonDepthwiseConvolutionBaseWorkload.cpp | 46 +++++++++++++ .../NeonDepthwiseConvolutionBaseWorkload.hpp | 19 ++++++ .../NeonDepthwiseConvolutionFloat32Workload.cpp | 41 ++++++------ .../NeonDepthwiseConvolutionFloat32Workload.hpp | 8 ++- .../NeonDepthwiseConvolutionUint8Workload.cpp | 39 +++++------ .../NeonDepthwiseConvolutionUint8Workload.hpp | 6 +- .../NeonWorkloads/NeonFloorFloat32Workload.cpp | 4 +- .../NeonWorkloads/NeonFloorFloat32Workload.hpp | 2 +- .../NeonFullyConnectedFloat32Workload.cpp | 67 +++++++++++++++---- .../NeonFullyConnectedFloat32Workload.hpp | 15 ++++- .../NeonL2NormalizationFloat32Workload.cpp | 16 ++++- .../NeonL2NormalizationFloat32Workload.hpp | 5 +- .../NeonWorkloads/NeonLstmFloat32Workload.cpp | 22 +++++++ .../NeonWorkloads/NeonLstmFloat32Workload.hpp | 20 ++++++ .../NeonWorkloads/NeonMergerFloat32Workload.cpp | 2 +- .../NeonWorkloads/NeonMergerFloat32Workload.hpp | 4 +- .../NeonWorkloads/NeonMergerUint8Workload.cpp | 2 +- .../NeonMultiplicationFloat32Workload.cpp | 23 ++++++- .../NeonMultiplicationFloat32Workload.hpp | 5 +- .../NeonNormalizationFloat32Workload.cpp | 23 +++++-- .../NeonNormalizationFloat32Workload.hpp | 6 +- .../backends/NeonWorkloads/NeonPermuteWorkload.cpp | 16 ++--- .../backends/NeonWorkloads/NeonPermuteWorkload.hpp | 13 ++-- .../NeonWorkloads/NeonPooling2dBaseWorkload.cpp | 8 +-- .../NeonWorkloads/NeonPooling2dBaseWorkload.hpp | 8 +-- .../NeonWorkloads/NeonPooling2dFloat32Workload.cpp | 5 +- .../NeonWorkloads/NeonPooling2dFloat32Workload.hpp | 3 +- .../NeonWorkloads/NeonPooling2dUint8Workload.cpp | 2 +- .../NeonWorkloads/NeonReshapeFloat32Workload.cpp | 4 +- .../NeonWorkloads/NeonReshapeFloat32Workload.hpp | 2 +- .../NeonWorkloads/NeonReshapeUint8Workload.cpp | 2 +- .../NeonWorkloads/NeonSoftmaxBaseWorkload.cpp | 30 +++++++++ .../NeonWorkloads/NeonSoftmaxBaseWorkload.hpp | 17 +++++ .../NeonWorkloads/NeonSoftmaxFloat32Workload.cpp | 6 +- .../NeonWorkloads/NeonSoftmaxFloat32Workload.hpp | 2 +- .../NeonWorkloads/NeonSoftmaxUint8Workload.cpp | 2 +- .../NeonWorkloads/NeonSplitterFloat32Workload.cpp | 2 +- .../NeonWorkloads/NeonSplitterFloat32Workload.hpp | 4 +- .../NeonWorkloads/NeonSplitterUint8Workload.cpp | 2 +- 61 files changed, 764 insertions(+), 198 deletions(-) create mode 100644 src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp create mode 100644 src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp (limited to 'src/armnn/backends/NeonWorkloads') diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp index 39e55d5761..711bfceeaf 100644 --- a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.cpp @@ -9,9 +9,32 @@ namespace armnn { + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Neon: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::NEActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + NeonActivationFloat32Workload::NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonActivationFloat32Workload", 1, 1); @@ -26,7 +49,7 @@ NeonActivationFloat32Workload::NeonActivationFloat32Workload(const ActivationQue void NeonActivationFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloat32Workload_Execute"); m_ActivationLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp index 6fa83ea2f6..0d26b3b39f 100644 --- a/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonActivationFloat32Workload.hpp @@ -9,7 +9,12 @@ namespace armnn { -class NeonActivationFloat32Workload : public Float32Workload + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +class NeonActivationFloat32Workload : public FloatWorkload { public: NeonActivationFloat32Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp index 27c37e9425..f2e42338b2 100644 --- a/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonActivationUint8Workload.cpp @@ -13,15 +13,8 @@ NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDe const WorkloadInfo& info) : Uint8Workload(descriptor, info) { - - std::string reasonIfUnsupported; - if (!IsNeonActivationUint8Supported(&reasonIfUnsupported, m_Data.m_Parameters)) - { - throw InvalidArgumentException(reasonIfUnsupported); - } - - // Only BoundedReLu is supported (see IsNeonActivationUint8Supported) - arm_compute::ActivationLayerInfo layerInfo(arm_compute::ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, m_Data.m_Parameters.m_A, m_Data.m_Parameters.m_B); @@ -35,7 +28,7 @@ NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDe void NeonActivationUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonActivationUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute"); m_ActivationLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp index d1fb64093d..f26e42aff9 100644 --- a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.cpp @@ -4,14 +4,30 @@ // #include "NeonAdditionFloat32Workload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" #include "backends/CpuTensorHandle.hpp" namespace armnn { +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticAddition::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + + NeonAdditionFloat32Workload::NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonAdditionFloat32Workload", 2, 1); @@ -24,7 +40,7 @@ NeonAdditionFloat32Workload::NeonAdditionFloat32Workload(const AdditionQueueDesc void NeonAdditionFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonAdditionFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloat32Workload_Execute"); m_AddLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp index 5b75b502a3..dae66bb69d 100644 --- a/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonAdditionFloat32Workload.hpp @@ -9,7 +9,12 @@ namespace armnn { -class NeonAdditionFloat32Workload : public Float32Workload + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonAdditionFloat32Workload : public FloatWorkload { public: NeonAdditionFloat32Workload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp index 247ebfc5dd..e0ad408424 100644 --- a/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp @@ -5,23 +5,27 @@ #pragma once +#include #include #include #include +#include #include +#include #include +#include "Half.hpp" namespace armnn { -// Base class template providing an implementation of the Constant layer common to all data types -template -class NeonBaseConstantWorkload : public TypedWorkload +// Base class template providing an implementation of the Constant layer common to all data types. +template +class NeonBaseConstantWorkload : public TypedWorkload { public: NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : TypedWorkload(descriptor, info) + : TypedWorkload(descriptor, info) , m_RanOnce(false) { } @@ -41,15 +45,22 @@ public: BOOST_ASSERT(data.m_LayerOutput != nullptr); arm_compute::ITensor& output = boost::polymorphic_downcast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = + boost::polymorphic_downcast(data.m_Outputs[0])->GetDataType(); - switch (DataFormat) + switch (computeDataType) { - case DataType::Float32: + case arm_compute::DataType::F16: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::F32: { CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); break; } - case DataType::QuantisedAsymm8: + case arm_compute::DataType::QASYMM8: { CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); break; diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp index 24640c7adb..6a87d62320 100644 --- a/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp @@ -5,20 +5,21 @@ #pragma once +#include #include namespace armnn { -// Base class template providing an implementation of the Merger layer common to all data types -template -class NeonBaseMergerWorkload : public TypedWorkload +// Base class template providing an implementation of the Merger layer common to all data types. +template +class NeonBaseMergerWorkload : public TypedWorkload { public: - using TypedWorkload::TypedWorkload; + using TypedWorkload::TypedWorkload; virtual void Execute() const override { - // With subtensors, merger is a no-op + // With subtensors, merger is a no-op. } }; diff --git a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp index 769905b48b..769291c700 100644 --- a/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp @@ -6,20 +6,21 @@ #pragma once #include +#include namespace armnn { -// Base class template providing an implementation of the Splitter layer common to all data types -template -class NeonBaseSplitterWorkload : public TypedWorkload +// Base class template providing an implementation of the Splitter layer common to all data types. +template +class NeonBaseSplitterWorkload : public TypedWorkload { public: - using TypedWorkload::TypedWorkload; + using TypedWorkload::TypedWorkload; virtual void Execute() const override { - // With subtensors, splitter is a no-op + // With subtensors, splitter is a no-op. } }; diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp index f107c8137f..ca5c8202cd 100644 --- a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.cpp @@ -6,40 +6,91 @@ #include "NeonBatchNormalizationFloat32Workload.hpp" #include "backends/CpuTensorHandle.hpp" #include "backends/ArmComputeTensorUtils.hpp" +#include "../../../../include/armnn/ArmNN.hpp" namespace armnn { using namespace armcomputetensorutils; + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + descriptor.m_Eps); +} + NeonBatchNormalizationFloat32Workload::NeonBatchNormalizationFloat32Workload( const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloat32Workload", 1, 1); arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); - BuildArmComputeTensor(m_Mean, m_Data.m_Mean->GetTensorInfo()); - BuildArmComputeTensor(m_Variance, m_Data.m_Variance->GetTensorInfo()); - BuildArmComputeTensor(m_Gamma, m_Data.m_Gamma->GetTensorInfo()); - BuildArmComputeTensor(m_Beta, m_Data.m_Beta->GetTensorInfo()); + m_Mean = std::make_unique(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); - m_Layer.configure( - &input, &output, &m_Mean, &m_Variance, &m_Beta, &m_Gamma, m_Data.m_Parameters.m_Eps); + m_Gamma = std::make_unique(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); - InitialiseArmComputeTensorData(m_Mean, m_Data.m_Mean->GetConstTensor()); - InitialiseArmComputeTensorData(m_Variance, m_Data.m_Variance->GetConstTensor()); - InitialiseArmComputeTensorData(m_Gamma, m_Data.m_Gamma->GetConstTensor()); - InitialiseArmComputeTensorData(m_Beta, m_Data.m_Beta->GetConstTensor()); + m_Beta = std::make_unique(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); } void NeonBatchNormalizationFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonBatchNormalizationFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloat32Workload_Execute"); m_Layer.run(); } +void NeonBatchNormalizationFloat32Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp index 2050d42859..5eb5601f26 100644 --- a/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp @@ -10,7 +10,15 @@ namespace armnn { -class NeonBatchNormalizationFloat32Workload : public Float32Workload +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor); + +class NeonBatchNormalizationFloat32Workload : public FloatWorkload { public: NeonBatchNormalizationFloat32Workload(const BatchNormalizationQueueDescriptor& descriptor, @@ -20,10 +28,12 @@ public: private: mutable arm_compute::NEBatchNormalizationLayer m_Layer; - arm_compute::Tensor m_Mean; - arm_compute::Tensor m_Variance; - arm_compute::Tensor m_Gamma; - arm_compute::Tensor m_Beta; + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Gamma; + std::unique_ptr m_Beta; + + void FreeUnusedTensors(); }; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp index 8b203fbf3a..4e5d570a8e 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.cpp @@ -10,7 +10,7 @@ namespace armnn void NeonConstantFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloat32Workload_Execute"); NeonBaseConstantWorkload::Execute(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp index 4ea4dfe127..050954df24 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonConstantFloat32Workload.hpp @@ -10,10 +10,10 @@ namespace armnn { -class NeonConstantFloat32Workload : public NeonBaseConstantWorkload +class NeonConstantFloat32Workload : public NeonBaseConstantWorkload { public: - using NeonBaseConstantWorkload::NeonBaseConstantWorkload; + using NeonBaseConstantWorkload::NeonBaseConstantWorkload; virtual void Execute() const override; }; diff --git a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp index f6dfaeb7a7..4061605bc1 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConstantUint8Workload.cpp @@ -10,7 +10,7 @@ namespace armnn void NeonConstantUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConstantUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute"); NeonBaseConstantWorkload::Execute(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..84fc051f65 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float16ToFloat32Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast(src); + auto output = reinterpret_cast(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..136c0d8a76 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..61f30522a8 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "Profiling.hpp" +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32ToFloat16Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast(src); + auto output = reinterpret_cast(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..f48c365c48 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp index 423f02bcb0..e76afb6cf7 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp @@ -9,6 +9,9 @@ #include "NeonConvolution2dBaseWorkload.hpp" +#include "armnn/Types.hpp" +#include "Half.hpp" + namespace armnn { @@ -41,28 +44,28 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, layerInfo); } -template -NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) - : TypedWorkload(descriptor, info) +template +NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : TypedWorkload(descriptor, info) { using arm_compute::NEDirectConvolutionLayer; - using namespace armcomputetensorutils; ValidateData(); - // todo: check tensor shapes match + // todo: check tensor shapes match. arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); - BuildArmComputeTensor(m_KernelTensor, m_Data.m_Weight->GetTensorInfo()); + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo()); - arm_compute::Tensor* optionalBiasTensor = nullptr; if (m_Data.m_Parameters.m_BiasEnabled) { - BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - optionalBiasTensor = &m_BiasTensor; + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); } arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, @@ -81,8 +84,8 @@ NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload(const Con { auto directConvolutionLayer = std::make_unique(memoryManager); directConvolutionLayer->configure(&input, - &m_KernelTensor, - optionalBiasTensor, + m_KernelTensor.get(), + m_BiasTensor.get(), &output, padStrideInfo); m_ConvolutionLayer.reset(directConvolutionLayer.release()); @@ -91,22 +94,50 @@ NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload(const Con { auto convolutionLayer = std::make_unique(memoryManager); convolutionLayer->configure(&input, - &m_KernelTensor, - optionalBiasTensor, + m_KernelTensor.get(), + m_BiasTensor.get(), &output, padStrideInfo); m_ConvolutionLayer.reset(convolutionLayer.release()); } BOOST_ASSERT(m_ConvolutionLayer); - using Type = ResolveType; + armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType(); + + switch (dataType) + { + case DataType::Float16: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + case DataType::Float32: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + case DataType::QuantisedAsymm8: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown DataType."); + } + } +} - InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); +template +void NeonConvolution2dBaseWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); } -// Generate known implementations for linker -template class NeonConvolution2dBaseWorkload; -template class NeonConvolution2dBaseWorkload; +// Generates known implementations for linker. +template class NeonConvolution2dBaseWorkload; +template class NeonConvolution2dBaseWorkload; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp index d28d50d819..524d2c90b6 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp @@ -25,11 +25,11 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, const TensorInfo& weights, const TensorInfo& biases); -template -class NeonConvolution2dBaseWorkload : public TypedWorkload +template +class NeonConvolution2dBaseWorkload : public TypedWorkload { public: - using TypedWorkload::m_Data; + using TypedWorkload::m_Data; NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr& memoryManager); @@ -38,8 +38,11 @@ public: protected: std::unique_ptr m_ConvolutionLayer; - arm_compute::Tensor m_KernelTensor; - arm_compute::Tensor m_BiasTensor; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); }; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp index f20f2a4ac5..18ec6ca2e7 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.cpp @@ -18,13 +18,16 @@ NeonConvolution2dFloat32Workload::NeonConvolution2dFloat32Workload(const Convolu { if (m_Data.m_Parameters.m_BiasEnabled) { - InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); } void NeonConvolution2dFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloat32Workload_Execute"); m_ConvolutionLayer->run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp index 56b0848efa..0bb8d69d94 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dFloat32Workload.hpp @@ -15,7 +15,7 @@ namespace armnn { -class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload +class NeonConvolution2dFloat32Workload : public NeonConvolution2dBaseWorkload { public: NeonConvolution2dFloat32Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, diff --git a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp index fb91f7b7b2..bb33e939ea 100644 --- a/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp @@ -14,14 +14,16 @@ NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution { if (m_Data.m_Parameters.m_BiasEnabled) { - InitialiseArmComputeTensorData(m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); } -} + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} void NeonConvolution2dUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonConvolution2dUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute"); m_ConvolutionLayer->run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..58d6061537 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonDepthwiseConvolutionBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const TensorInfo& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +} diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..0cead354f8 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const TensorInfo& biases); + +} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp index 11e31c727a..f94cd903b6 100644 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.cpp @@ -16,23 +16,17 @@ using namespace armcomputetensorutils; NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - std::string reasonIfUnsupported; - if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo)) - { - throw UnimplementedException(reasonIfUnsupported); - } + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); - BuildArmComputeTensor(m_KernelTensor, weightInfo); - - arm_compute::Tensor* optionalBias = nullptr; if (m_Data.m_Parameters.m_BiasEnabled) { - BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - optionalBias = &m_BiasTensor; + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); } arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, @@ -54,8 +48,8 @@ NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload m_pDepthwiseConvolutionLayer = std::make_unique(); static_cast( m_pDepthwiseConvolutionLayer.get())->configure(&input, - &m_KernelTensor, - optionalBias, + m_KernelTensor.get(), + m_BiasTensor.get(), &output, padStrideInfo); } @@ -64,28 +58,37 @@ NeonDepthwiseConvolutionFloat32Workload::NeonDepthwiseConvolutionFloat32Workload m_pDepthwiseConvolutionLayer = std::make_unique(); static_cast( m_pDepthwiseConvolutionLayer.get())->configure(&input, - &m_KernelTensor, - optionalBias, + m_KernelTensor.get(), + m_BiasTensor.get(), &output, padStrideInfo); } BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor()); + InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); - if (optionalBias) + if (m_BiasTensor) { - InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor()); + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); } void NeonDepthwiseConvolutionFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloat32Workload_Execute"); BOOST_ASSERT(m_pDepthwiseConvolutionLayer); m_pDepthwiseConvolutionLayer->run(); } +void NeonDepthwiseConvolutionFloat32Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp index f9e295f568..ece9f1877b 100644 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionFloat32Workload.hpp @@ -10,7 +10,7 @@ namespace armnn { -class NeonDepthwiseConvolutionFloat32Workload : public Float32Workload +class NeonDepthwiseConvolutionFloat32Workload : public FloatWorkload { public: NeonDepthwiseConvolutionFloat32Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, @@ -20,8 +20,10 @@ public: private: mutable std::unique_ptr m_pDepthwiseConvolutionLayer; - arm_compute::Tensor m_KernelTensor; - arm_compute::Tensor m_BiasTensor; + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); }; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp index bd034c4f80..45fbcb37ab 100644 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp @@ -20,19 +20,13 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( { const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); - std::string reasonIfUnsupported; - if (!IsNeonDepthwiseConvolution2dDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters, weightInfo)) - { - throw UnimplementedException(reasonIfUnsupported); - } + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); - BuildArmComputeTensor(m_KernelTensor, weightInfo); - - arm_compute::Tensor* optionalBias = nullptr; if (m_Data.m_Parameters.m_BiasEnabled) { - BuildArmComputeTensor(m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); - optionalBias = &m_BiasTensor; + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); } arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, @@ -54,8 +48,8 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( m_pDepthwiseConvolutionLayer = std::make_unique(); static_cast( m_pDepthwiseConvolutionLayer.get())->configure(&input, - &m_KernelTensor, - optionalBias, + m_KernelTensor.get(), + m_BiasTensor.get(), &output, padStrideInfo); } @@ -64,28 +58,37 @@ NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( m_pDepthwiseConvolutionLayer = std::make_unique(); static_cast( m_pDepthwiseConvolutionLayer.get())->configure(&input, - &m_KernelTensor, - optionalBias, + m_KernelTensor.get(), + m_BiasTensor.get(), &output, padStrideInfo); } BOOST_ASSERT(m_pDepthwiseConvolutionLayer); - InitialiseArmComputeTensorData(m_KernelTensor, m_Data.m_Weight->GetConstTensor()); + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor()); - if (optionalBias) + if (m_BiasTensor) { - InitialiseArmComputeTensorData(*optionalBias, m_Data.m_Bias->GetConstTensor()); + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor()); } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); } void NeonDepthwiseConvolutionUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::GpuAcc, "NeonDepthwiseConvolutionUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); BOOST_ASSERT(m_pDepthwiseConvolutionLayer); m_pDepthwiseConvolutionLayer->run(); } +void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp index 9cf272e9f5..aca0ba5337 100644 --- a/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp @@ -20,8 +20,10 @@ public: private: mutable std::unique_ptr m_pDepthwiseConvolutionLayer; - arm_compute::Tensor m_KernelTensor; - arm_compute::Tensor m_BiasTensor; + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); }; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp index a5eec5cadb..c43cfa9c46 100644 --- a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.cpp @@ -9,7 +9,7 @@ namespace armnn { NeonFloorFloat32Workload::NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonFloorFloat32Workload", 1, 1); @@ -21,7 +21,7 @@ NeonFloorFloat32Workload::NeonFloorFloat32Workload(const FloorQueueDescriptor& d void NeonFloorFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFloorFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloat32Workload_Execute"); m_Layer.run(); } } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp index f876f1e1bb..56680f1e39 100644 --- a/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonFloorFloat32Workload.hpp @@ -10,7 +10,7 @@ namespace armnn { -class NeonFloorFloat32Workload : public Float32Workload +class NeonFloorFloat32Workload : public FloatWorkload { public: NeonFloorFloat32Workload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp index e1c4448642..c3af41e20d 100644 --- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.cpp @@ -4,16 +4,47 @@ // #include "NeonFullyConnectedFloat32Workload.hpp" -#include "backends/CpuTensorHandle.hpp" + #include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/CpuTensorHandle.hpp" namespace armnn { using namespace armcomputetensorutils; +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + + return arm_compute::NEFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr& memoryManager) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) , m_FullyConnectedLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonFullyConnectedFloat32Workload", 1, 1); @@ -21,33 +52,45 @@ NeonFullyConnectedFloat32Workload::NeonFullyConnectedFloat32Workload(const Fully arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); - BuildArmComputeTensor(m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); - arm_compute::Tensor* optionalBiasTensor = nullptr; if (m_Data.m_Parameters.m_BiasEnabled) { - BuildArmComputeTensor(m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); - optionalBiasTensor = &m_BiasesTensor; + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); } // Construct - m_FullyConnectedLayer.configure( - &input, &m_WeightsTensor, optionalBiasTensor, &output, m_Data.m_Parameters.m_TransposeWeightMatrix); + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); // Allocate - InitialiseArmComputeTensorData(m_WeightsTensor, m_Data.m_Weight->GetConstTensor()); + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); - if (optionalBiasTensor) + if (m_BiasesTensor) { - InitialiseArmComputeTensorData(*optionalBiasTensor, m_Data.m_Bias->GetConstTensor()); + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); } void NeonFullyConnectedFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonFullyConnectedFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloat32Workload_Execute"); m_FullyConnectedLayer.run(); } +void NeonFullyConnectedFloat32Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp index 9c722dc573..684b5e0753 100644 --- a/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp @@ -14,7 +14,13 @@ namespace armnn { -class NeonFullyConnectedFloat32Workload : public Float32Workload +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class NeonFullyConnectedFloat32Workload : public FloatWorkload { public: NeonFullyConnectedFloat32Workload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, @@ -23,8 +29,11 @@ public: private: mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; - arm_compute::Tensor m_WeightsTensor; - arm_compute::Tensor m_BiasesTensor; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); }; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp index 9f79fa09de..a3ae33f41f 100644 --- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.cpp @@ -9,9 +9,21 @@ namespace armnn { +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr& memoryManager) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) , m_Layer(memoryManager) { m_Data.ValidateInputsOutputs("NeonL2NormalizationFloat32Workload", 1, 1); @@ -23,7 +35,7 @@ NeonL2NormalizationFloat32Workload::NeonL2NormalizationFloat32Workload(const L2N void NeonL2NormalizationFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonL2NormalizationFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloat32Workload_Execute"); m_Layer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp index 2b4a1fef37..c3fcde5a57 100644 --- a/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp @@ -14,7 +14,10 @@ namespace armnn { -class NeonL2NormalizationFloat32Workload : public Float32Workload +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class NeonL2NormalizationFloat32Workload : public FloatWorkload { public: NeonL2NormalizationFloat32Workload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, diff --git a/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp new file mode 100644 index 0000000000..ba1369e179 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonLstmFloat32Workload.hpp" + +namespace armnn +{ +NeonLstmFloat32Workload::NeonLstmFloat32Workload(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonLstmFloat32Workload", 1, 1); +} + +void NeonLstmFloat32Workload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Neon backend!"); +} + +} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp new file mode 100644 index 0000000000..78ee1da341 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonLstmFloat32Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonLstmFloat32Workload : public FloatWorkload +{ +public: + NeonLstmFloat32Workload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp index 7520e8768e..30dd283620 100644 --- a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.cpp @@ -10,7 +10,7 @@ namespace armnn void NeonMergerFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloat32Workload_Execute"); NeonBaseMergerWorkload::Execute(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp index 5c889c2af0..7b8ee9881f 100644 --- a/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonMergerFloat32Workload.hpp @@ -10,10 +10,10 @@ namespace armnn { -class NeonMergerFloat32Workload : public NeonBaseMergerWorkload +class NeonMergerFloat32Workload : public NeonBaseMergerWorkload { public: - using NeonBaseMergerWorkload::NeonBaseMergerWorkload; + using NeonBaseMergerWorkload::NeonBaseMergerWorkload; virtual void Execute() const override; }; diff --git a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp index 51578e5bff..caccdd443a 100644 --- a/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonMergerUint8Workload.cpp @@ -10,7 +10,7 @@ namespace armnn void NeonMergerUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClMergerUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute"); NeonBaseMergerWorkload::Execute(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp index 58ce7b74ba..a8a3cd77b4 100644 --- a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.cpp @@ -9,9 +9,28 @@ namespace armnn { +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + NeonMultiplicationFloat32Workload::NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonMultiplicationFloat32Workload", 2, 1); @@ -32,7 +51,7 @@ NeonMultiplicationFloat32Workload::NeonMultiplicationFloat32Workload(const Multi void NeonMultiplicationFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonMultiplicationFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloat32Workload_Execute"); m_PixelWiseMultiplication.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp index ed5ead3700..62e84a2e07 100644 --- a/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonMultiplicationFloat32Workload.hpp @@ -9,8 +9,11 @@ namespace armnn { +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); -class NeonMultiplicationFloat32Workload : public Float32Workload +class NeonMultiplicationFloat32Workload : public FloatWorkload { public: NeonMultiplicationFloat32Workload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp index 0fd0dcc420..20936a2760 100644 --- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.cpp @@ -6,13 +6,28 @@ #include "NeonNormalizationFloat32Workload.hpp" #include "backends/NeonLayerSupport.hpp" #include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" namespace armnn { +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) - : Float32Workload(descriptor, info) + const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) , m_NormalizationLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonNormalizationFloat32Workload", 1, 1); @@ -22,7 +37,7 @@ NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const Normali throw UnimplementedException(reasonIfUnsupported); } - // input and output tensors have to have the same dimensionality + // Input and output tensors have to have the same dimensionality. if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] @@ -48,7 +63,7 @@ NeonNormalizationFloat32Workload::NeonNormalizationFloat32Workload(const Normali void NeonNormalizationFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonNormalizationFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloat32Workload_Execute"); m_NormalizationLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp index 24b6da8528..8f0823454b 100644 --- a/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonNormalizationFloat32Workload.hpp @@ -12,7 +12,11 @@ namespace armnn { -class NeonNormalizationFloat32Workload : public Float32Workload +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class NeonNormalizationFloat32Workload : public FloatWorkload { public: NeonNormalizationFloat32Workload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp index e0a0457422..c27797ee4e 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.cpp @@ -24,10 +24,10 @@ arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, armcomputetensorutils::BuildArmComputePermutationVector(mappings)); } -template -NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, +template +NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info) - : TypedWorkload(descriptor, info) + : TypedWorkload(descriptor, info) { using armcomputetensorutils::BuildArmComputePermutationVector; @@ -37,18 +37,18 @@ NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - // Run the layer + // Run the layer. m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); } -template -void NeonPermuteWorkload::Execute() const +template +void NeonPermuteWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, GetName() + "_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); m_PermuteFunction.run(); } -template class NeonPermuteWorkload; +template class NeonPermuteWorkload; template class NeonPermuteWorkload; } // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp index 56e8719d6c..06b2dc692b 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonPermuteWorkload.hpp @@ -7,6 +7,7 @@ #include "backends/Workload.hpp" #include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" #include #include @@ -18,13 +19,13 @@ namespace armnn arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, const PermuteDescriptor& descriptor); -template -class NeonPermuteWorkload : public TypedWorkload +template +class NeonPermuteWorkload : public TypedWorkload { public: static const std::string& GetName() { - static const std::string name = std::string("NeonPermute") + GetDataTypeName(DataType) + "Workload"; + static const std::string name = std::string("NeonPermuteWorkload"); return name; } @@ -32,11 +33,11 @@ public: void Execute() const override; private: - using TypedWorkload::m_Data; + using TypedWorkload::m_Data; mutable arm_compute::NEPermute m_PermuteFunction; }; -using NeonPermuteFloat32Workload = NeonPermuteWorkload; +using NeonPermuteFloatWorkload = NeonPermuteWorkload; using NeonPermuteUint8Workload = NeonPermuteWorkload; -} //namespace armnn +} // namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp index 6d6a492155..3585d36ba3 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp @@ -25,10 +25,10 @@ arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); } -template -NeonPooling2dBaseWorkload::NeonPooling2dBaseWorkload( +template +NeonPooling2dBaseWorkload::NeonPooling2dBaseWorkload( const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) - : TypedWorkload(descriptor, info) + : TypedWorkload(descriptor, info) { m_Data.ValidateInputsOutputs(name, 1, 1); @@ -40,7 +40,7 @@ NeonPooling2dBaseWorkload::NeonPooling2dBaseWorkload( m_PoolingLayer.configure(&input, &output, layerInfo); } -template class NeonPooling2dBaseWorkload; +template class NeonPooling2dBaseWorkload; template class NeonPooling2dBaseWorkload; } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp index 9461982f86..2e85e937fa 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp @@ -14,12 +14,12 @@ arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, const TensorInfo& output, const Pooling2dDescriptor& descriptor); -// Base class template providing an implementation of the Pooling2d layer common to all data types -template -class NeonPooling2dBaseWorkload : public TypedWorkload +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template +class NeonPooling2dBaseWorkload : public TypedWorkload { public: - using TypedWorkload::m_Data; + using TypedWorkload::m_Data; NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name); diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp index ba2aa20924..cb690c51b8 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.cpp @@ -12,13 +12,14 @@ namespace armnn NeonPooling2dFloat32Workload::NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : NeonPooling2dBaseWorkload(descriptor, info, "NeonPooling2dFloat32Workload") + : NeonPooling2dBaseWorkload(descriptor, info, + "NeonPooling2dFloat32Workload") { } void NeonPooling2dFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloat32Workload_Execute"); m_PoolingLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp index 6cfc9cc96f..36c4e7edf1 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dFloat32Workload.hpp @@ -11,7 +11,8 @@ namespace armnn { -class NeonPooling2dFloat32Workload : public NeonPooling2dBaseWorkload +class NeonPooling2dFloat32Workload : public NeonPooling2dBaseWorkload { public: NeonPooling2dFloat32Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp index 0778794081..3e06d08dea 100644 --- a/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp @@ -18,7 +18,7 @@ NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescr void NeonPooling2dUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonPooling2dUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute"); m_PoolingLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp index 317d16f6bd..93f6eb8ef5 100644 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.cpp @@ -12,7 +12,7 @@ namespace armnn NeonReshapeFloat32Workload::NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonReshapeFloat32Workload", 1, 1); @@ -24,7 +24,7 @@ NeonReshapeFloat32Workload::NeonReshapeFloat32Workload(const ReshapeQueueDescrip void NeonReshapeFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloat32Workload_Execute"); m_Layer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp index 27f4aea9e7..3e5cca1b9e 100644 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeFloat32Workload.hpp @@ -10,7 +10,7 @@ namespace armnn { -class NeonReshapeFloat32Workload : public Float32Workload +class NeonReshapeFloat32Workload : public FloatWorkload { public: NeonReshapeFloat32Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp index 06f57c1e0f..b31bdcd3d0 100644 --- a/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp @@ -24,7 +24,7 @@ NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& void NeonReshapeUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonReshapeUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute"); m_Layer.run(); } } //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..3efffafe25 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#include "NeonSoftmaxBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta); +} + +} //namespace armnn + diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..b9b21fb254 --- /dev/null +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// See LICENSE file in the project root for full license information. +// + +#pragma once + +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor); + +} //namespace armnn diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp index 5e2925ca02..027b508ad5 100644 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.cpp @@ -10,12 +10,12 @@ namespace armnn NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr& memoryManager) - : Float32Workload(descriptor, info) + : FloatWorkload(descriptor, info) , m_SoftmaxLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonSoftmaxFloat32Workload", 1, 1); - // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions + // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); @@ -24,7 +24,7 @@ NeonSoftmaxFloat32Workload::NeonSoftmaxFloat32Workload(const SoftmaxQueueDescrip void NeonSoftmaxFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSoftmaxFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloat32Workload_Execute"); m_SoftmaxLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp index 91d25b47f8..3656a26a3c 100644 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxFloat32Workload.hpp @@ -14,7 +14,7 @@ namespace armnn { -class NeonSoftmaxFloat32Workload : public Float32Workload +class NeonSoftmaxFloat32Workload : public FloatWorkload { public: NeonSoftmaxFloat32Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, diff --git a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp index eb4a23c13c..4b0c05b25b 100644 --- a/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp @@ -32,7 +32,7 @@ NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& void NeonSoftmaxUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "ClSoftmaxUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); m_SoftmaxLayer.run(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp index 13701d2ed3..996fc15adb 100644 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.cpp @@ -10,7 +10,7 @@ namespace armnn void NeonSplitterFloat32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterFloat32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloat32Workload_Execute"); NeonBaseSplitterWorkload::Execute(); } diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp index 432f5de4eb..9f6dc75499 100644 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterFloat32Workload.hpp @@ -10,10 +10,10 @@ namespace armnn { -class NeonSplitterFloat32Workload : public NeonBaseSplitterWorkload +class NeonSplitterFloat32Workload : public NeonBaseSplitterWorkload { public: - using NeonBaseSplitterWorkload::NeonBaseSplitterWorkload; + using NeonBaseSplitterWorkload::NeonBaseSplitterWorkload; virtual void Execute() const override; }; diff --git a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp index 90d24d3ffd..0d6328ff7e 100644 --- a/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp +++ b/src/armnn/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp @@ -10,7 +10,7 @@ namespace armnn void NeonSplitterUint8Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuAcc, "NeonSplitterUint8Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute"); NeonBaseSplitterWorkload::Execute(); } -- cgit v1.2.1