From 10b4dfd8e9ccd7a03df7bb053ee1c644cb37f8ab Mon Sep 17 00:00:00 2001 From: David Beck Date: Wed, 19 Sep 2018 12:03:20 +0100 Subject: IVGCVSW-1897 : build infrastructure for the src/backends folder Change-Id: I7ebafb675ccc77ad54d1deb01412a8379a5356bb --- src/backends/NeonWorkloads/CMakeLists.txt | 83 ++++++++++++ .../NeonWorkloads/NeonActivationFloatWorkload.cpp | 57 ++++++++ .../NeonWorkloads/NeonActivationFloatWorkload.hpp | 29 ++++ .../NeonWorkloads/NeonActivationUint8Workload.cpp | 35 +++++ .../NeonWorkloads/NeonActivationUint8Workload.hpp | 28 ++++ .../NeonWorkloads/NeonAdditionFloatWorkload.cpp | 48 +++++++ .../NeonWorkloads/NeonAdditionFloatWorkload.hpp | 30 +++++ .../NeonWorkloads/NeonBaseConstantWorkload.hpp | 83 ++++++++++++ .../NeonWorkloads/NeonBaseMergerWorkload.hpp | 26 ++++ .../NeonWorkloads/NeonBaseSplitterWorkload.hpp | 27 ++++ .../NeonBatchNormalizationFloatWorkload.cpp | 96 ++++++++++++++ .../NeonBatchNormalizationFloatWorkload.hpp | 42 ++++++ .../NeonWorkloads/NeonConstantFloatWorkload.cpp | 17 +++ .../NeonWorkloads/NeonConstantFloatWorkload.hpp | 20 +++ .../NeonWorkloads/NeonConstantUint8Workload.cpp | 17 +++ .../NeonWorkloads/NeonConstantUint8Workload.hpp | 20 +++ .../NeonConvertFp16ToFp32Workload.cpp | 41 ++++++ .../NeonConvertFp16ToFp32Workload.hpp | 26 ++++ .../NeonConvertFp32ToFp16Workload.cpp | 43 ++++++ .../NeonConvertFp32ToFp16Workload.hpp | 26 ++++ .../NeonConvolution2dBaseWorkload.cpp | 146 +++++++++++++++++++++ .../NeonConvolution2dBaseWorkload.hpp | 49 +++++++ .../NeonConvolution2dFloatWorkload.cpp | 40 ++++++ .../NeonConvolution2dFloatWorkload.hpp | 29 ++++ .../NeonConvolution2dUint8Workload.cpp | 35 +++++ .../NeonConvolution2dUint8Workload.hpp | 29 ++++ .../NeonDepthwiseConvolutionBaseWorkload.cpp | 49 +++++++ .../NeonDepthwiseConvolutionBaseWorkload.hpp | 21 +++ .../NeonDepthwiseConvolutionFloatWorkload.cpp | 94 +++++++++++++ .../NeonDepthwiseConvolutionFloatWorkload.hpp | 33 +++++ .../NeonDepthwiseConvolutionUint8Workload.cpp | 94 +++++++++++++ .../NeonDepthwiseConvolutionUint8Workload.hpp | 29 ++++ .../NeonWorkloads/NeonFloorFloatWorkload.cpp | 30 +++++ .../NeonWorkloads/NeonFloorFloatWorkload.hpp | 27 ++++ .../NeonFullyConnectedFloatWorkload.cpp | 96 ++++++++++++++ .../NeonFullyConnectedFloatWorkload.hpp | 40 ++++++ .../NeonL2NormalizationFloatWorkload.cpp | 42 ++++++ .../NeonL2NormalizationFloatWorkload.hpp | 33 +++++ .../NeonWorkloads/NeonLstmFloatWorkload.cpp | 22 ++++ .../NeonWorkloads/NeonLstmFloatWorkload.hpp | 20 +++ .../NeonWorkloads/NeonMergerFloatWorkload.cpp | 17 +++ .../NeonWorkloads/NeonMergerFloatWorkload.hpp | 20 +++ .../NeonWorkloads/NeonMergerUint8Workload.cpp | 17 +++ .../NeonWorkloads/NeonMergerUint8Workload.hpp | 20 +++ .../NeonMultiplicationFloatWorkload.cpp | 60 +++++++++ .../NeonMultiplicationFloatWorkload.hpp | 30 +++++ .../NeonNormalizationFloatWorkload.cpp | 70 ++++++++++ .../NeonNormalizationFloatWorkload.hpp | 34 +++++ src/backends/NeonWorkloads/NeonPermuteWorkload.cpp | 54 ++++++++ src/backends/NeonWorkloads/NeonPermuteWorkload.hpp | 43 ++++++ .../NeonWorkloads/NeonPooling2dBaseWorkload.cpp | 47 +++++++ .../NeonWorkloads/NeonPooling2dBaseWorkload.hpp | 37 ++++++ .../NeonWorkloads/NeonPooling2dFloatWorkload.cpp | 27 ++++ .../NeonWorkloads/NeonPooling2dFloatWorkload.hpp | 25 ++++ .../NeonWorkloads/NeonPooling2dUint8Workload.cpp | 26 ++++ .../NeonWorkloads/NeonPooling2dUint8Workload.hpp | 25 ++++ .../NeonWorkloads/NeonReshapeFloatWorkload.cpp | 32 +++++ .../NeonWorkloads/NeonReshapeFloatWorkload.hpp | 29 ++++ .../NeonWorkloads/NeonReshapeUint8Workload.cpp | 30 +++++ .../NeonWorkloads/NeonReshapeUint8Workload.hpp | 27 ++++ .../NeonWorkloads/NeonSoftmaxBaseWorkload.cpp | 30 +++++ .../NeonWorkloads/NeonSoftmaxBaseWorkload.hpp | 17 +++ .../NeonWorkloads/NeonSoftmaxFloatWorkload.cpp | 32 +++++ .../NeonWorkloads/NeonSoftmaxFloatWorkload.hpp | 29 ++++ .../NeonWorkloads/NeonSoftmaxUint8Workload.cpp | 41 ++++++ .../NeonWorkloads/NeonSoftmaxUint8Workload.hpp | 27 ++++ .../NeonWorkloads/NeonSplitterFloatWorkload.cpp | 17 +++ .../NeonWorkloads/NeonSplitterFloatWorkload.hpp | 20 +++ .../NeonWorkloads/NeonSplitterUint8Workload.cpp | 17 +++ .../NeonWorkloads/NeonSplitterUint8Workload.hpp | 20 +++ .../NeonWorkloads/NeonSubtractionFloatWorkload.cpp | 46 +++++++ .../NeonWorkloads/NeonSubtractionFloatWorkload.hpp | 27 ++++ src/backends/NeonWorkloads/backend.cmake | 9 ++ 73 files changed, 2754 insertions(+) create mode 100644 src/backends/NeonWorkloads/CMakeLists.txt create mode 100644 src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPermuteWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPermuteWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp create mode 100644 src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp create mode 100644 src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp create mode 100644 src/backends/NeonWorkloads/backend.cmake (limited to 'src/backends/NeonWorkloads') diff --git a/src/backends/NeonWorkloads/CMakeLists.txt b/src/backends/NeonWorkloads/CMakeLists.txt new file mode 100644 index 0000000000..02cb53dff1 --- /dev/null +++ b/src/backends/NeonWorkloads/CMakeLists.txt @@ -0,0 +1,83 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnNeonBackend_sources + NeonActivationFloatWorkload.cpp + NeonActivationFloatWorkload.hpp + NeonActivationUint8Workload.cpp + NeonActivationUint8Workload.hpp + NeonAdditionFloatWorkload.cpp + NeonAdditionFloatWorkload.hpp + NeonBaseConstantWorkload.hpp + NeonBaseMergerWorkload.hpp + NeonBaseSplitterWorkload.hpp + NeonBatchNormalizationFloatWorkload.cpp + NeonBatchNormalizationFloatWorkload.hpp + NeonConstantFloatWorkload.cpp + NeonConstantFloatWorkload.hpp + NeonConstantUint8Workload.cpp + NeonConstantUint8Workload.hpp + NeonConvertFp16ToFp32Workload.cpp + NeonConvertFp16ToFp32Workload.hpp + NeonConvertFp32ToFp16Workload.cpp + NeonConvertFp32ToFp16Workload.hpp + NeonConvolution2dBaseWorkload.cpp + NeonConvolution2dBaseWorkload.hpp + NeonConvolution2dFloatWorkload.cpp + NeonConvolution2dFloatWorkload.hpp + NeonConvolution2dUint8Workload.cpp + NeonConvolution2dUint8Workload.hpp + NeonDepthwiseConvolutionBaseWorkload.cpp + NeonDepthwiseConvolutionBaseWorkload.hpp + NeonDepthwiseConvolutionFloatWorkload.cpp + NeonDepthwiseConvolutionFloatWorkload.hpp + NeonDepthwiseConvolutionUint8Workload.cpp + NeonDepthwiseConvolutionUint8Workload.hpp + NeonFloorFloatWorkload.cpp + NeonFloorFloatWorkload.hpp + NeonFullyConnectedFloatWorkload.cpp + NeonFullyConnectedFloatWorkload.hpp + NeonL2NormalizationFloatWorkload.cpp + NeonL2NormalizationFloatWorkload.hpp + NeonLstmFloatWorkload.cpp + NeonLstmFloatWorkload.hpp + NeonMergerFloatWorkload.cpp + NeonMergerFloatWorkload.hpp + NeonMergerUint8Workload.cpp + NeonMergerUint8Workload.hpp + NeonMultiplicationFloatWorkload.cpp + NeonMultiplicationFloatWorkload.hpp + NeonNormalizationFloatWorkload.cpp + NeonNormalizationFloatWorkload.hpp + NeonPermuteWorkload.cpp + NeonPermuteWorkload.hpp + NeonPooling2dBaseWorkload.cpp + NeonPooling2dBaseWorkload.hpp + NeonPooling2dFloatWorkload.cpp + NeonPooling2dFloatWorkload.hpp + NeonPooling2dUint8Workload.cpp + NeonPooling2dUint8Workload.hpp + NeonReshapeFloatWorkload.cpp + NeonReshapeFloatWorkload.hpp + NeonReshapeUint8Workload.cpp + NeonReshapeUint8Workload.hpp + NeonSoftmaxBaseWorkload.cpp + NeonSoftmaxBaseWorkload.hpp + NeonSoftmaxFloatWorkload.cpp + NeonSoftmaxFloatWorkload.hpp + NeonSoftmaxUint8Workload.cpp + NeonSoftmaxUint8Workload.hpp + NeonSplitterFloatWorkload.cpp + NeonSplitterFloatWorkload.hpp + NeonSplitterUint8Workload.cpp + NeonSplitterUint8Workload.hpp + NeonSubtractionFloatWorkload.cpp + NeonSubtractionFloatWorkload.hpp +) + +add_library(armnnNeonBackend STATIC ${armnnNeonBackend_sources}) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp new file mode 100644 index 0000000000..bedf3dcb02 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.cpp @@ -0,0 +1,57 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationFloatWorkload.hpp" +#include "backends/ArmComputeUtils.hpp" + + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Neon: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::NEActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +NeonActivationFloatWorkload::NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void NeonActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp new file mode 100644 index 0000000000..f8d25ca47d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +class NeonActivationFloatWorkload : public FloatWorkload +{ +public: + NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp b/src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp new file mode 100644 index 0000000000..a9b94d2916 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationUint8Workload.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ +NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void NeonActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp b/src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp new file mode 100644 index 0000000000..405e600691 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonActivationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonActivationUint8Workload : public Uint8Workload +{ +public: + NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp new file mode 100644 index 0000000000..adc34e91c4 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonAdditionFloatWorkload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticAddition::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + + +NeonAdditionFloatWorkload::NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonAdditionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonAdditionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloatWorkload_Execute"); + m_AddLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp new file mode 100644 index 0000000000..154e4f33d1 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonAdditionFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonAdditionFloatWorkload : public FloatWorkload +{ +public: + NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticAddition m_AddLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp b/src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp new file mode 100644 index 0000000000..f4a09d4aed --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBaseConstantWorkload.hpp @@ -0,0 +1,83 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "Half.hpp" + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types. +template +class NeonBaseConstantWorkload : public TypedWorkload +{ +public: + NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override + { + using namespace armcomputetensorutils; + + // The intermediate tensor held by the corresponding layer output handler can be initialised with the + // given data on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer + // may not have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::ITensor& output = + boost::polymorphic_downcast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = + boost::polymorphic_downcast(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } + } + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp b/src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp new file mode 100644 index 0000000000..603e7f3544 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBaseMergerWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ +// Base class template providing an implementation of the Merger layer common to all data types. +template +class NeonBaseMergerWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp b/src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..9288d4427e --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBaseSplitterWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template +class NeonBaseSplitterWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, splitter is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..6f5c948084 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonBatchNormalizationFloatWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "armnn/ArmNN.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + descriptor.m_Eps); +} + +NeonBatchNormalizationFloatWorkload::NeonBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Mean = std::make_unique(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void NeonBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void NeonBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn + + diff --git a/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..7982541748 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor); + +class NeonBatchNormalizationFloatWorkload : public FloatWorkload +{ +public: + NeonBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEBatchNormalizationLayer m_Layer; + + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Gamma; + std::unique_ptr m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp new file mode 100644 index 0000000000..dbdd057101 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantFloatWorkload.hpp" + +namespace armnn +{ + +void NeonConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloatWorkload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp new file mode 100644 index 0000000000..c35b5fda3e --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantFloatWorkload : public NeonBaseConstantWorkload +{ +public: + using NeonBaseConstantWorkload::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp b/src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp new file mode 100644 index 0000000000..c607d86844 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantUint8Workload.hpp" + +namespace armnn +{ + +void NeonConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp b/src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp new file mode 100644 index 0000000000..2cb9516afe --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantUint8Workload : public NeonBaseConstantWorkload +{ +public: + using NeonBaseConstantWorkload::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..86ec31c71d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float16ToFloat32Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast(src); + auto output = reinterpret_cast(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..d70401b5f2 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp16ToFp32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..0f4fbe4e93 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "Profiling.hpp" +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32ToFloat16Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast(src); + auto output = reinterpret_cast(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..eb839fdd9d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvertFp32ToFp16Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair; + std::vector m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..0e9894ce78 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.cpp @@ -0,0 +1,146 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include "armnn/Types.hpp" +#include "Half.hpp" + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::NEConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +template +NeonConvolution2dBaseWorkload::NeonConvolution2dBaseWorkload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : TypedWorkload(descriptor, info) +{ + using arm_compute::NEDirectConvolutionLayer; + + ValidateData(); + + // todo: check tensor shapes match. + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + const bool preferDirectConvolution = + IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(), + m_Data.m_Parameters); + + if (preferDirectConvolution) + { + auto directConvolutionLayer = std::make_unique(memoryManager); + directConvolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(directConvolutionLayer.release()); + } + else + { + auto convolutionLayer = std::make_unique(memoryManager); + convolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(convolutionLayer.release()); + } + BOOST_ASSERT(m_ConvolutionLayer); + + armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType(); + + switch (dataType) + { + case DataType::Float16: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + case DataType::Float32: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + case DataType::QuantisedAsymm8: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown DataType."); + } + } +} + +template +void NeonConvolution2dBaseWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generates known implementations for linker. +template class NeonConvolution2dBaseWorkload; +template class NeonConvolution2dBaseWorkload; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..77d90cd84b --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dBaseWorkload.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonWorkloadUtils.hpp" +#include "backends/Workload.hpp" + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +#include + +namespace armnn +{ + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +template +class NeonConvolution2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + virtual void ValidateData() const {}; + +protected: + std::unique_ptr m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..ca7a0c575a --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dFloatWorkload.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/NeonLayerSupport.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonConvolution2dFloatWorkload::NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloatWorkload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dFloatWorkload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dFloatWorkload", 1, 1); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..dd8ef55f43 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class NeonConvolution2dFloatWorkload : public NeonConvolution2dBaseWorkload +{ +public: + NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + void Execute() const override; + void ValidateData() const override; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..5affe682b4 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dUint8Workload.hpp" + +namespace armnn +{ + +NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dUint8Workload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..ef60fc3e84 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonConvolution2dUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload +{ +public: + NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + virtual void ValidateData() const override; + virtual void Execute() const override; +private: +}; + +} //namespace armnnn + diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..e79e14f2ed --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +} \ No newline at end of file diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..eec432be86 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/NeonWorkloadUtils.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..1ec1417a58 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,94 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionFloatWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..4ec8c1dc37 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload +{ +public: + NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr m_pDepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..b7813a59c5 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,94 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique(); + static_cast( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor()); + + if (m_BiasTensor) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor()); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..a0be512f9b --- /dev/null +++ b/src/backends/NeonWorkloads/NeonDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload +{ +public: + NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr m_pDepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp new file mode 100644 index 0000000000..a08ba8a6ec --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFloorFloatWorkload.hpp" + +namespace armnn +{ +NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); + m_Layer.run(); +} +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp new file mode 100644 index 0000000000..ad9f44bbf9 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFloorFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonFloorFloatWorkload : public FloatWorkload +{ +public: + NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp new file mode 100644 index 0000000000..2036ecb203 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFullyConnectedFloatWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + + return arm_compute::NEFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +NeonFullyConnectedFloatWorkload::NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonFullyConnectedFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + + if (m_BiasesTensor) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void NeonFullyConnectedFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedFloatWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void NeonFullyConnectedFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp new file mode 100644 index 0000000000..27e5717b04 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonFullyConnectedFloatWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class NeonFullyConnectedFloatWorkload : public FloatWorkload +{ +public: + NeonFullyConnectedFloatWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..7296e67179 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonL2NormalizationFloatWorkload.hpp" +#include "backends/ArmComputeUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_Layer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void NeonL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..078c4d140f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonL2NormalizationFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class NeonL2NormalizationFloatWorkload : public FloatWorkload +{ +public: + NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + // Purposely not a NEL2Normalize function. See constructor. + mutable arm_compute::NENormalizationLayer m_Layer; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp new file mode 100644 index 0000000000..8b2b58d9b1 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonLstmFloatWorkload.hpp" + +namespace armnn +{ +NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonLstmFloatWorkload", 1, 1); +} + +void NeonLstmFloatWorkload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Neon backend!"); +} + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp new file mode 100644 index 0000000000..6064a017f9 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonLstmFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonLstmFloatWorkload : public FloatWorkload +{ +public: + NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp new file mode 100644 index 0000000000..79039aa51a --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerFloatWorkload.hpp" + +namespace armnn +{ + +void NeonMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloatWorkload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp new file mode 100644 index 0000000000..e7088b8c2f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerFloatWorkload : public NeonBaseMergerWorkload +{ +public: + using NeonBaseMergerWorkload::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp b/src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp new file mode 100644 index 0000000000..3989702bd3 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerUint8Workload.hpp" + +namespace armnn +{ + +void NeonMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp b/src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp new file mode 100644 index 0000000000..73c0fd55ad --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMergerUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerUint8Workload : public NeonBaseMergerWorkload +{ +public: + using NeonBaseMergerWorkload::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..c4241ece19 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMultiplicationFloatWorkload.hpp" + + +namespace armnn +{ + +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonMultiplicationFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + m_PixelWiseMultiplication.configure(&input1, + &input2, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +void NeonMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute"); + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn + + diff --git a/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..4b187b2d42 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonMultiplicationFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonMultiplicationFloatWorkload : public FloatWorkload +{ +public: + NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..4534c376d8 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonNormalizationFloatWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_NormalizationLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); + std::string reasonIfUnsupported; + if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + // Input and output tensors have to have the same dimensionality. + if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] + || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] + || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] + || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2]) + { + throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality."); + } + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType); + arm_compute::NormalizationLayerInfo normalizationInfo(normType, + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K, + false); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +} + +void NeonNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..633944ddc9 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonNormalizationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class NeonNormalizationFloatWorkload : public FloatWorkload +{ +public: + NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NENormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonPermuteWorkload.cpp b/src/backends/NeonWorkloads/NeonPermuteWorkload.cpp new file mode 100644 index 0000000000..9bc76ba853 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPermuteWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPermuteWorkload.hpp" +#include "backends/NeonTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const armnn::PermutationVector& mappings = descriptor.m_DimMappings; + + return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo, + armcomputetensorutils::BuildArmComputePermutationVector(mappings)); +} + +template +NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template +void NeonPermuteWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class NeonPermuteWorkload; +template class NeonPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonPermuteWorkload.hpp b/src/backends/NeonWorkloads/NeonPermuteWorkload.hpp new file mode 100644 index 0000000000..1fe05b1645 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPermuteWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" +#include "backends/NeonWorkloadUtils.hpp" + +#include +#include + +#include + +namespace armnn +{ +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const PermuteDescriptor& descriptor); + +template +class NeonPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("NeonPermuteWorkload"); + return name; + } + + NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload::m_Data; + mutable arm_compute::NEPermute m_PermuteFunction; +}; + +using NeonPermuteFloatWorkload = NeonPermuteWorkload; +using NeonPermuteUint8Workload = NeonPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..208d08c4c5 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dBaseWorkload.hpp" +#include "backends/NeonLayerSupport.hpp" +#include "backends/NeonTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template +NeonPooling2dBaseWorkload::NeonPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class NeonPooling2dBaseWorkload; +template class NeonPooling2dBaseWorkload; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..77d6bf2f06 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dBaseWorkload.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template +class NeonPooling2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::NEPoolingLayer m_PoolingLayer; +}; + + +} //namespace armnn + + + + + diff --git a/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..46996b088c --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonPooling2dFloatWorkload::NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload(descriptor, info, + "NeonPooling2dFloatWorkload") +{ +} + +void NeonPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..78a35748bb --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dFloatWorkload : public NeonPooling2dBaseWorkload +{ +public: + NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + diff --git a/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..8f99a2be86 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dUint8Workload.hpp" + + + +namespace armnn +{ + +NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload(descriptor, info, "NeonPooling2dUint8Workload") +{ +} + +void NeonPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..d475c5f721 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload +{ +public: + NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..2dae9466bb --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonReshapeFloatWorkload::NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..066765adeb --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonReshapeFloatWorkload : public FloatWorkload +{ +public: + NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp new file mode 100644 index 0000000000..41aa07fe49 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeUint8Workload.hpp" + + + + +namespace armnn +{ +NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute"); + m_Layer.run(); +} +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp new file mode 100644 index 0000000000..3f7c470323 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonReshapeUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +class NeonReshapeUint8Workload : public Uint8Workload +{ +public: + NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..ca9e4f058d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..24910df7c7 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/NeonWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor); + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..92e5139c1a --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxFloatWorkload.hpp" + +namespace armnn +{ + +NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1); + + // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void NeonSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..47745c658f --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class NeonSoftmaxFloatWorkload : public FloatWorkload +{ +public: + NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..cff869c9b7 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxUint8Workload.hpp" + +namespace armnn +{ + +NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void NeonSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..f894c5a958 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSoftmaxUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +namespace armnn +{ + +class NeonSoftmaxUint8Workload : public Uint8Workload +{ +public: + NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..39ed5b7cbc --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterFloatWorkload.hpp" + +namespace armnn +{ + +void NeonSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloatWorkload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..744a4fe216 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterFloatWorkload : public NeonBaseSplitterWorkload +{ +public: + using NeonBaseSplitterWorkload::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp new file mode 100644 index 0000000000..4b2cf8fc91 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterUint8Workload.hpp" + +namespace armnn +{ + +void NeonSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp new file mode 100644 index 0000000000..f219cfaa7d --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSplitterUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload +{ +public: + using NeonBaseSplitterWorkload::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp new file mode 100644 index 0000000000..3f37d82d22 --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSubtractionFloatWorkload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticSubtraction::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + +NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonSubtractionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast(m_Data.m_Outputs[0])->GetTensor(); + + m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonSubtractionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute"); + m_SubLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp new file mode 100644 index 0000000000..18988a35ca --- /dev/null +++ b/src/backends/NeonWorkloads/NeonSubtractionFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonSubtractionFloatWorkload : public FloatWorkload +{ +public: + NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticSubtraction m_SubLayer; +}; + +} //namespace armnn diff --git a/src/backends/NeonWorkloads/backend.cmake b/src/backends/NeonWorkloads/backend.cmake new file mode 100644 index 0000000000..f0908c5496 --- /dev/null +++ b/src/backends/NeonWorkloads/backend.cmake @@ -0,0 +1,9 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTENEON) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/NeonWorkloads) + list(APPEND armnnLibraries armnnNeonBackend) +endif() -- cgit v1.2.1