diff options
Diffstat (limited to 'src/backends/neon')
35 files changed, 238 insertions, 88 deletions
diff --git a/src/backends/neon/workloads/NeonActivationWorkload.cpp b/src/backends/neon/workloads/NeonActivationWorkload.cpp index 6e95678d13..c75a138301 100644 --- a/src/backends/neon/workloads/NeonActivationWorkload.cpp +++ b/src/backends/neon/workloads/NeonActivationWorkload.cpp @@ -4,8 +4,11 @@ // #include "NeonActivationWorkload.hpp" +#include "NeonWorkloadUtils.hpp" #include <aclCommon/ArmComputeUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEActivationLayer.h> + namespace armnn { @@ -43,13 +46,16 @@ NeonActivationWorkload::NeonActivationWorkload(const ActivationQueueDescriptor& arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_ActivationLayer.configure(&input, &output, activationLayerInfo); + auto layer = std::make_unique<arm_compute::NEActivationLayer>(); + layer->configure(&input, &output, activationLayerInfo); + + m_ActivationLayer.reset(layer.release()); } void NeonActivationWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationWorkload_Execute"); - m_ActivationLayer.run(); + m_ActivationLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonActivationWorkload.hpp b/src/backends/neon/workloads/NeonActivationWorkload.hpp index fc7c6467dc..eefbfb6522 100644 --- a/src/backends/neon/workloads/NeonActivationWorkload.hpp +++ b/src/backends/neon/workloads/NeonActivationWorkload.hpp @@ -5,7 +5,10 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> namespace armnn { @@ -21,7 +24,7 @@ public: void Execute() const override; private: - mutable arm_compute::NEActivationLayer m_ActivationLayer; + std::unique_ptr<arm_compute::IFunction> m_ActivationLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp index 70a3909091..fa537819a9 100644 --- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp +++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp @@ -4,9 +4,13 @@ // #include "NeonAdditionWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + #include <aclCommon/ArmComputeTensorUtils.hpp> #include <backendsCommon/CpuTensorHandle.hpp> +#include <arm_compute/runtime/NEON/functions/NEArithmeticAddition.h> + namespace armnn { @@ -35,13 +39,15 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); + auto layer = std::make_unique<arm_compute::NEArithmeticAddition>(); + layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); + m_AddLayer.reset(layer.release()); } void NeonAdditionWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionWorkload_Execute"); - m_AddLayer.run(); + m_AddLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.hpp b/src/backends/neon/workloads/NeonAdditionWorkload.hpp index ca8ae8d7b7..826fb1f3dd 100644 --- a/src/backends/neon/workloads/NeonAdditionWorkload.hpp +++ b/src/backends/neon/workloads/NeonAdditionWorkload.hpp @@ -5,7 +5,10 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> namespace armnn { @@ -21,7 +24,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEArithmeticAddition m_AddLayer; + std::unique_ptr<arm_compute::IFunction> m_AddLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp index 44d5035431..fc80f413e8 100644 --- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp +++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp @@ -4,9 +4,13 @@ // #include "NeonBatchNormalizationWorkload.hpp" + +#include "NeonWorkloadUtils.hpp" + #include <backendsCommon/CpuTensorHandle.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> -#include <armnn/ArmNN.hpp> + +#include <arm_compute/runtime/NEON/functions/NEBatchNormalizationLayer.h> namespace armnn { @@ -68,13 +72,15 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( m_Beta = std::make_unique<arm_compute::Tensor>(); BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); - m_Layer.configure(&input, - &output, - m_Mean.get(), - m_Variance.get(), - m_Beta.get(), - m_Gamma.get(), - m_Data.m_Parameters.m_Eps); + auto layer = std::make_unique<arm_compute::NEBatchNormalizationLayer>(); + layer->configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + m_Layer.reset(layer.release()); InitializeArmComputeTensorData(*m_Mean, m_Data.m_Mean); InitializeArmComputeTensorData(*m_Variance, m_Data.m_Variance); @@ -83,14 +89,14 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( // Force Compute Library to perform the necessary copying and reshaping, after which // delete all the input tensors that will no longer be needed - m_Layer.prepare(); + m_Layer->prepare(); FreeUnusedTensors(); } void NeonBatchNormalizationWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationWorkload_Execute"); - m_Layer.run(); + m_Layer->run(); } void NeonBatchNormalizationWorkload::FreeUnusedTensors() diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp index 52e4db7c90..3619ea0d73 100644 --- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp +++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp @@ -5,7 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/Tensor.h> + +#include <memory> namespace armnn { @@ -26,7 +31,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEBatchNormalizationLayer m_Layer; + std::unique_ptr<arm_compute::IFunction> m_Layer; std::unique_ptr<arm_compute::Tensor> m_Mean; std::unique_ptr<arm_compute::Tensor> m_Variance; diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index 151132f04d..1080f320e7 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -7,7 +7,9 @@ #include <backendsCommon/CpuTensorHandle.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> -#include <neon/NeonLayerSupport.hpp> +#include <neon/workloads/NeonWorkloadUtils.hpp> + +#include <arm_compute/runtime/NEON/functions/NEConvolutionLayer.h> #include <armnn/Types.hpp> #include <Half.hpp> diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp index daf9a43fe4..3fb408dbaa 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp @@ -5,12 +5,10 @@ #pragma once -#include <aclCommon/ArmComputeTensorUtils.hpp> -#include <backendsCommon/CpuTensorHandle.hpp> -#include <neon/NeonLayerSupport.hpp> -#include <neon/workloads/NeonWorkloadUtils.hpp> #include <backendsCommon/Workload.hpp> +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/Tensor.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> #include <memory> diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index be26359662..c915555dd7 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -5,11 +5,18 @@ #include "NeonDepthwiseConvolutionWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <DataLayoutIndexed.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> #include <neon/NeonLayerSupport.hpp> #include <backendsCommon/CpuTensorHandle.hpp> #include <backendsCommon/WorkloadUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h> + +using namespace armnnUtils; + namespace armnn { diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp index b5f2ae9223..85932d3f9a 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp @@ -5,7 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/Tensor.h> + +#include <memory> namespace armnn { diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp index a08ba8a6ec..f024fef2a8 100644 --- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp @@ -5,6 +5,12 @@ #include "NeonFloorFloatWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <arm_compute/runtime/NEON/functions/NEFloor.h> + +#include <boost/polymorphic_cast.hpp> + namespace armnn { NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, @@ -16,13 +22,15 @@ NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descr arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); + auto layer = std::make_unique<arm_compute::NEFloor>(); + layer->configure(&input, &output); + m_Layer.reset(layer.release()); } void NeonFloorFloatWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); - m_Layer.run(); + m_Layer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp index a4ce47663c..01b86a6706 100644 --- a/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp @@ -5,7 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/IFunction.h> +#include <arm_compute/runtime/Tensor.h> + +#include <memory> namespace armnn { @@ -17,7 +22,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEFloor m_Layer; + std::unique_ptr<arm_compute::IFunction> m_Layer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp index e432a6b833..7395270400 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp @@ -5,10 +5,13 @@ #include "NeonFullyConnectedWorkload.hpp" +#include "NeonWorkloadUtils.hpp" #include <aclCommon/ArmComputeTensorUtils.hpp> #include <aclCommon/ArmComputeUtils.hpp> #include <backendsCommon/CpuTensorHandle.hpp> +#include <arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h> + namespace armnn { using namespace armcomputetensorutils; @@ -45,7 +48,6 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) - , m_FullyConnectedLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1); @@ -64,7 +66,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue // Construct arm_compute::FullyConnectedLayerInfo fc_info; fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; - m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager); + layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + m_FullyConnectedLayer.reset(layer.release()); // Allocate if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) @@ -90,14 +95,14 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue // Force Compute Library to perform the necessary copying and reshaping, after which // delete all the input tensors that will no longer be needed - m_FullyConnectedLayer.prepare(); + m_FullyConnectedLayer->prepare(); FreeUnusedTensors(); } void NeonFullyConnectedWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute"); - m_FullyConnectedLayer.run(); + m_FullyConnectedLayer->run(); } void NeonFullyConnectedWorkload::FreeUnusedTensors() diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp index ec1661d642..1cd8be109a 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp @@ -5,9 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> +#include <arm_compute/runtime/Tensor.h> #include <memory> @@ -28,7 +31,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + std::unique_ptr<arm_compute::IFunction> m_FullyConnectedLayer; std::unique_ptr<arm_compute::Tensor> m_WeightsTensor; std::unique_ptr<arm_compute::Tensor> m_BiasesTensor; diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp index afaa700624..99bbcfa824 100644 --- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp @@ -4,8 +4,13 @@ // #include "NeonL2NormalizationFloatWorkload.hpp" + +#include "NeonWorkloadUtils.hpp" + #include <aclCommon/ArmComputeUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEL2NormalizeLayer.h> + namespace armnn { using namespace armcomputetensorutils; @@ -25,7 +30,6 @@ arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) - , m_Layer(memoryManager) { m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); @@ -38,13 +42,15 @@ NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2Norma unsigned int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0; - m_Layer.configure(&input, &output, axis); + auto layer = std::make_unique<arm_compute::NEL2NormalizeLayer>(memoryManager); + layer->configure(&input, &output, axis); + m_Layer.reset(layer.release()); } void NeonL2NormalizationFloatWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); - m_Layer.run(); + m_Layer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp index 30058c571f..2a8eb38ef9 100644 --- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp @@ -5,7 +5,10 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> #include <memory> @@ -25,7 +28,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEL2NormalizeLayer m_Layer; + std::unique_ptr<arm_compute::IFunction> m_Layer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerWorkload.cpp b/src/backends/neon/workloads/NeonMergerWorkload.cpp index f82e24453a..be096b4b25 100644 --- a/src/backends/neon/workloads/NeonMergerWorkload.cpp +++ b/src/backends/neon/workloads/NeonMergerWorkload.cpp @@ -4,11 +4,14 @@ // #include "NeonMergerWorkload.hpp" -#include <armnn/ArmNN.hpp> + +#include "NeonWorkloadUtils.hpp" + #include <aclCommon/ArmComputeTensorUtils.hpp> #include <backendsCommon/CpuTensorHandle.hpp> #include <neon/NeonTensorHandle.hpp> +#include <arm_compute/runtime/NEON/functions/NEConcatenateLayer.h> namespace armnn { @@ -66,9 +69,11 @@ const MergerQueueDescriptor& descriptor, const WorkloadInfo& info) arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH; - m_Layer.configure(aclInputs, &output, aclAxis); + auto layer = std::make_unique<arm_compute::NEConcatenateLayer>(); + layer->configure(aclInputs, &output, aclAxis); + m_Layer.reset(layer.release()); - m_Layer.prepare(); + m_Layer->prepare(); } void NeonMergerWorkload::Execute() const @@ -76,7 +81,7 @@ void NeonMergerWorkload::Execute() const if (m_Execute) { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerWorkload_Execute"); - m_Layer.run(); + m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonMergerWorkload.hpp b/src/backends/neon/workloads/NeonMergerWorkload.hpp index a4f36d18bc..3432c626cb 100644 --- a/src/backends/neon/workloads/NeonMergerWorkload.hpp +++ b/src/backends/neon/workloads/NeonMergerWorkload.hpp @@ -6,7 +6,11 @@ #pragma once #include <backendsCommon/Workload.hpp> -#include <neon/workloads/NeonWorkloadUtils.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> +# +#include <memory> namespace armnn { @@ -23,7 +27,7 @@ public: void Execute() const override; private: - mutable arm_compute::NEConcatenateLayer m_Layer; + std::unique_ptr<arm_compute::IFunction> m_Layer; bool m_Execute; }; diff --git a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp index c4241ece19..778e78213c 100644 --- a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp @@ -5,6 +5,9 @@ #include "NeonMultiplicationFloatWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h> namespace armnn { @@ -41,18 +44,20 @@ NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const Multiplic // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be // ignored for F32 tensors. - m_PixelWiseMultiplication.configure(&input1, - &input2, - &output, - 1.0f, - arm_compute::ConvertPolicy::SATURATE, - arm_compute::RoundingPolicy::TO_ZERO); + auto layer = std::make_unique<arm_compute::NEPixelWiseMultiplication>(); + layer->configure(&input1, + &input2, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); + m_PixelWiseMultiplication.reset(layer.release()); } void NeonMultiplicationFloatWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute"); - m_PixelWiseMultiplication.run(); + m_PixelWiseMultiplication->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp index 8fa3171110..a65ad4ec4c 100644 --- a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp @@ -5,7 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> + +#include <memory> namespace armnn { @@ -20,7 +25,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; + std::unique_ptr<arm_compute::IFunction> m_PixelWiseMultiplication; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp index 854ecd3c59..92c0396d86 100644 --- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp @@ -4,10 +4,13 @@ // #include "NeonNormalizationFloatWorkload.hpp" -#include <neon/NeonLayerSupport.hpp> + +#include "NeonWorkloadUtils.hpp" #include <aclCommon/ArmComputeUtils.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NENormalizationLayer.h> + using namespace armnn::armcomputetensorutils; namespace armnn @@ -57,7 +60,6 @@ NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const Normalizati const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) - , m_NormalizationLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); std::string reasonIfUnsupported; @@ -89,14 +91,15 @@ NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const Normalizati m_Data.m_Parameters.m_Beta, m_Data.m_Parameters.m_K, false); - - m_NormalizationLayer.configure(&input, &output, normalizationInfo); + auto layer = std::make_unique<arm_compute::NENormalizationLayer>(memoryManager); + layer->configure(&input, &output, normalizationInfo); + m_NormalizationLayer.reset(layer.release()); } void NeonNormalizationFloatWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); - m_NormalizationLayer.run(); + m_NormalizationLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp index 89eba573da..17bbeb41ab 100644 --- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp @@ -5,9 +5,14 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> +#include <memory> + namespace armnn { @@ -23,7 +28,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NENormalizationLayer m_NormalizationLayer; + std::unique_ptr<arm_compute::IFunction> m_NormalizationLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp index 9c8f71ad19..75bceb1bc7 100644 --- a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp @@ -4,11 +4,15 @@ // #include "NeonPooling2dWorkload.hpp" -#include <neon/NeonLayerSupport.hpp> + +#include "NeonWorkloadUtils.hpp" + #include <neon/NeonTensorHandle.hpp> #include <aclCommon/ArmComputeUtils.hpp> #include <aclCommon/ArmComputeTensorUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEPoolingLayer.h> + namespace armnn { using namespace armcomputetensorutils; @@ -42,13 +46,15 @@ NeonPooling2dWorkload::NeonPooling2dWorkload( arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); - m_PoolingLayer.configure(&input, &output, layerInfo); + auto layer = std::make_unique<arm_compute::NEPoolingLayer>(); + layer->configure(&input, &output, layerInfo); + m_PoolingLayer.reset(layer.release()); } void NeonPooling2dWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dWorkload_Execute"); - m_PoolingLayer.run(); + m_PoolingLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.hpp b/src/backends/neon/workloads/NeonPooling2dWorkload.hpp index b2379f7f53..b0e3aa8c59 100644 --- a/src/backends/neon/workloads/NeonPooling2dWorkload.hpp +++ b/src/backends/neon/workloads/NeonPooling2dWorkload.hpp @@ -5,7 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> + +#include <memory> namespace armnn { @@ -24,7 +29,7 @@ public: void Execute() const override; private: - mutable arm_compute::NEPoolingLayer m_PoolingLayer; + std::unique_ptr<arm_compute::IFunction> m_PoolingLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.cpp b/src/backends/neon/workloads/NeonReshapeWorkload.cpp index c2dcdd500c..40fbef6520 100644 --- a/src/backends/neon/workloads/NeonReshapeWorkload.cpp +++ b/src/backends/neon/workloads/NeonReshapeWorkload.cpp @@ -5,6 +5,12 @@ #include "NeonReshapeWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <arm_compute/runtime/NEON/functions/NEReshapeLayer.h> + +#include <boost/polymorphic_cast.hpp> + namespace armnn { @@ -17,13 +23,15 @@ NeonReshapeWorkload::NeonReshapeWorkload(const ReshapeQueueDescriptor& descripto arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input, &output); + auto layer = std::make_unique<arm_compute::NEReshapeLayer>(); + layer->configure(&input, &output); + m_Layer.reset(layer.release()); } void NeonReshapeWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeWorkload_Execute"); - m_Layer.run(); + m_Layer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.hpp b/src/backends/neon/workloads/NeonReshapeWorkload.hpp index 38b6c510d2..2202463928 100644 --- a/src/backends/neon/workloads/NeonReshapeWorkload.hpp +++ b/src/backends/neon/workloads/NeonReshapeWorkload.hpp @@ -5,7 +5,11 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/IFunction.h> + +#include <memory> namespace armnn { @@ -18,7 +22,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEReshapeLayer m_Layer; + std::unique_ptr<arm_compute::IFunction> m_Layer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp index 434de874ae..b229bc48a2 100644 --- a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp +++ b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp @@ -7,6 +7,8 @@ #include <aclCommon/ArmComputeTensorUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h> + namespace armnn { diff --git a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp index 6e96c2d2b4..6eecb9787d 100644 --- a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp +++ b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp @@ -5,7 +5,8 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <armnn/Descriptors.hpp> +#include <arm_compute/core/Error.h> namespace armnn { diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp index 92e5139c1a..d9c78bbd43 100644 --- a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp @@ -5,13 +5,16 @@ #include "NeonSoftmaxFloatWorkload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h> + namespace armnn { NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1); @@ -19,13 +22,15 @@ NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); + auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager); + layer->configure(&input, &output, m_Data.m_Parameters.m_Beta); + m_SoftmaxLayer.reset(layer.release()); } void NeonSoftmaxFloatWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute"); - m_SoftmaxLayer.run(); + m_SoftmaxLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp index 9c11b27bb5..77f2cc3d01 100644 --- a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp @@ -5,7 +5,9 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/IFunction.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> #include <memory> @@ -21,7 +23,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; + std::unique_ptr<arm_compute::IFunction> m_SoftmaxLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp index cff869c9b7..f780589075 100644 --- a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp +++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp @@ -5,6 +5,10 @@ #include "NeonSoftmaxUint8Workload.hpp" +#include "NeonWorkloadUtils.hpp" + +#include <arm_compute/runtime/NEON/functions/NESoftmaxLayer.h> + namespace armnn { @@ -12,7 +16,6 @@ NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) - , m_SoftmaxLayer(memoryManager) { m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); @@ -27,14 +30,16 @@ NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); } - m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); + auto layer = std::make_unique<arm_compute::NESoftmaxLayer>(memoryManager); + layer->configure(&input, &output, descriptor.m_Parameters.m_Beta); + m_SoftmaxLayer.reset(layer.release()); } void NeonSoftmaxUint8Workload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); - m_SoftmaxLayer.run(); + m_SoftmaxLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp index b3bcbf38b5..c5692084c9 100644 --- a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp +++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp @@ -5,9 +5,13 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/runtime/IFunction.h> #include <arm_compute/runtime/MemoryManagerOnDemand.h> +#include <memory> + namespace armnn { @@ -19,7 +23,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; + std::unique_ptr<arm_compute::IFunction> m_SoftmaxLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp index 1eae0a49ce..e39f8aa18e 100644 --- a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp @@ -4,9 +4,13 @@ // #include "NeonSubtractionFloatWorkload.hpp" + +#include "NeonWorkloadUtils.hpp" #include <aclCommon/ArmComputeTensorUtils.hpp> #include <backendsCommon/CpuTensorHandle.hpp> +#include <arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h> + namespace armnn { @@ -34,13 +38,15 @@ NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueu arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); - m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); + auto layer = std::make_unique<arm_compute::NEArithmeticSubtraction>(); + layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); + m_SubLayer.reset(layer.release()); } void NeonSubtractionFloatWorkload::Execute() const { ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute"); - m_SubLayer.run(); + m_SubLayer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp index 09016993e0..5dce112299 100644 --- a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp @@ -5,7 +5,12 @@ #pragma once -#include <neon/workloads/NeonWorkloadUtils.hpp> +#include <backendsCommon/Workload.hpp> + +#include <arm_compute/core/Error.h> +#include <arm_compute/runtime/IFunction.h> + +#include <memory> namespace armnn { @@ -21,7 +26,7 @@ public: virtual void Execute() const override; private: - mutable arm_compute::NEArithmeticSubtraction m_SubLayer; + std::unique_ptr<arm_compute::IFunction> m_SubLayer; }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.hpp b/src/backends/neon/workloads/NeonWorkloadUtils.hpp index 17e14cdd99..22ffece6a2 100644 --- a/src/backends/neon/workloads/NeonWorkloadUtils.hpp +++ b/src/backends/neon/workloads/NeonWorkloadUtils.hpp @@ -9,7 +9,6 @@ #include <neon/NeonTensorHandle.hpp> #include <neon/NeonTimer.hpp> #include <backendsCommon/CpuTensorHandle.hpp> -#include <arm_compute/runtime/NEON/NEFunctions.h> #include <Half.hpp> |