diff options
author | David Beck <david.beck@arm.com> | 2018-09-24 15:59:27 +0100 |
---|---|---|
committer | Matthew Bentham <matthew.bentham@arm.com> | 2018-10-10 16:16:57 +0100 |
commit | 0dbe0ee25312b728d77383d11c465156e64ae757 (patch) | |
tree | af37a9802e3ad551e1bf63f7636508cde7a41643 /src/backends/neon/workloads | |
parent | b4540bef0b0327683fe8e63f727c1212800dc2a9 (diff) | |
download | armnn-0dbe0ee25312b728d77383d11c465156e64ae757.tar.gz |
IVGCVSW-1899 : Neon backend folder structure
armnn:149855
Change-Id: I26e8cf83422a65049386a5ebdb6d0001627aefaa
Diffstat (limited to 'src/backends/neon/workloads')
75 files changed, 2890 insertions, 0 deletions
diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt new file mode 100644 index 0000000000..850c65cb4e --- /dev/null +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -0,0 +1,86 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnNeonBackendWorkloads_sources + NeonActivationFloatWorkload.cpp + NeonActivationFloatWorkload.hpp + NeonActivationUint8Workload.cpp + NeonActivationUint8Workload.hpp + NeonAdditionFloatWorkload.cpp + NeonAdditionFloatWorkload.hpp + NeonBaseConstantWorkload.hpp + NeonBaseMergerWorkload.hpp + NeonBaseSplitterWorkload.hpp + NeonBatchNormalizationFloatWorkload.cpp + NeonBatchNormalizationFloatWorkload.hpp + NeonConstantFloatWorkload.cpp + NeonConstantFloatWorkload.hpp + NeonConstantUint8Workload.cpp + NeonConstantUint8Workload.hpp + NeonConvertFp16ToFp32Workload.cpp + NeonConvertFp16ToFp32Workload.hpp + NeonConvertFp32ToFp16Workload.cpp + NeonConvertFp32ToFp16Workload.hpp + NeonConvolution2dBaseWorkload.cpp + NeonConvolution2dBaseWorkload.hpp + NeonConvolution2dFloatWorkload.cpp + NeonConvolution2dFloatWorkload.hpp + NeonConvolution2dUint8Workload.cpp + NeonConvolution2dUint8Workload.hpp + NeonDepthwiseConvolutionBaseWorkload.cpp + NeonDepthwiseConvolutionBaseWorkload.hpp + NeonDepthwiseConvolutionFloatWorkload.cpp + NeonDepthwiseConvolutionFloatWorkload.hpp + NeonDepthwiseConvolutionUint8Workload.cpp + NeonDepthwiseConvolutionUint8Workload.hpp + NeonFloorFloatWorkload.cpp + NeonFloorFloatWorkload.hpp + NeonFullyConnectedWorkload.cpp + NeonFullyConnectedWorkload.hpp + NeonL2NormalizationFloatWorkload.cpp + NeonL2NormalizationFloatWorkload.hpp + NeonLstmFloatWorkload.cpp + NeonLstmFloatWorkload.hpp + NeonMergerFloatWorkload.cpp + NeonMergerFloatWorkload.hpp + NeonMergerUint8Workload.cpp + NeonMergerUint8Workload.hpp + NeonMultiplicationFloatWorkload.cpp + NeonMultiplicationFloatWorkload.hpp + NeonNormalizationFloatWorkload.cpp + NeonNormalizationFloatWorkload.hpp + NeonPermuteWorkload.cpp + NeonPermuteWorkload.hpp + NeonPooling2dBaseWorkload.cpp + NeonPooling2dBaseWorkload.hpp + NeonPooling2dFloatWorkload.cpp + NeonPooling2dFloatWorkload.hpp + NeonPooling2dUint8Workload.cpp + NeonPooling2dUint8Workload.hpp + NeonReshapeFloatWorkload.cpp + NeonReshapeFloatWorkload.hpp + NeonReshapeUint8Workload.cpp + NeonReshapeUint8Workload.hpp + NeonSoftmaxBaseWorkload.cpp + NeonSoftmaxBaseWorkload.hpp + NeonSoftmaxFloatWorkload.cpp + NeonSoftmaxFloatWorkload.hpp + NeonSoftmaxUint8Workload.cpp + NeonSoftmaxUint8Workload.hpp + NeonSplitterFloatWorkload.cpp + NeonSplitterFloatWorkload.hpp + NeonSplitterUint8Workload.cpp + NeonSplitterUint8Workload.hpp + NeonSubtractionFloatWorkload.cpp + NeonSubtractionFloatWorkload.hpp + NeonWorkloads.hpp + NeonWorkloadUtils.cpp + NeonWorkloadUtils.hpp +) + +add_library(armnnNeonBackendWorkloads STATIC ${armnnNeonBackendWorkloads_sources}) +target_include_directories(armnnNeonBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnNeonBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnNeonBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/neon/workloads/NeonActivationFloatWorkload.cpp b/src/backends/neon/workloads/NeonActivationFloatWorkload.cpp new file mode 100644 index 0000000000..1d6bf70431 --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationFloatWorkload.cpp @@ -0,0 +1,57 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeUtils.hpp> + + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Neon: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::NEActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +NeonActivationFloatWorkload::NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<ActivationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void NeonActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonActivationFloatWorkload.hpp b/src/backends/neon/workloads/NeonActivationFloatWorkload.hpp new file mode 100644 index 0000000000..4d2f51fb4f --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +class NeonActivationFloatWorkload : public FloatWorkload<ActivationQueueDescriptor> +{ +public: + NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonActivationUint8Workload.cpp b/src/backends/neon/workloads/NeonActivationUint8Workload.cpp new file mode 100644 index 0000000000..4aed6b510f --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationUint8Workload.hpp" +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/neon/NeonLayerSupport.hpp> + +namespace armnn +{ +NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<NeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<NeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void NeonActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonActivationUint8Workload.hpp b/src/backends/neon/workloads/NeonActivationUint8Workload.hpp new file mode 100644 index 0000000000..56e3544379 --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> +{ +public: + NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/neon/workloads/NeonAdditionFloatWorkload.cpp b/src/backends/neon/workloads/NeonAdditionFloatWorkload.cpp new file mode 100644 index 0000000000..445e32ea44 --- /dev/null +++ b/src/backends/neon/workloads/NeonAdditionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonAdditionFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticAddition::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + + +NeonAdditionFloatWorkload::NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<AdditionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonAdditionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonAdditionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloatWorkload_Execute"); + m_AddLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonAdditionFloatWorkload.hpp b/src/backends/neon/workloads/NeonAdditionFloatWorkload.hpp new file mode 100644 index 0000000000..769492e949 --- /dev/null +++ b/src/backends/neon/workloads/NeonAdditionFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonAdditionFloatWorkload : public FloatWorkload<AdditionQueueDescriptor> +{ +public: + NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticAddition m_AddLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonBaseConstantWorkload.hpp b/src/backends/neon/workloads/NeonBaseConstantWorkload.hpp new file mode 100644 index 0000000000..6bb275ac13 --- /dev/null +++ b/src/backends/neon/workloads/NeonBaseConstantWorkload.hpp @@ -0,0 +1,82 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <arm_compute/core/Types.h> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/Workload.hpp> +#include <Half.hpp> + +#include <boost/cast.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types. +template <armnn::DataType... DataFormats> +class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormats...> +{ +public: + NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload<ConstantQueueDescriptor, DataFormats...>(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override + { + using namespace armcomputetensorutils; + + // The intermediate tensor held by the corresponding layer output handler can be initialised with the + // given data on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer + // may not have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::ITensor& output = + boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = + boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<Half>(), output); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<float>(), output); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } + } + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonBaseMergerWorkload.hpp b/src/backends/neon/workloads/NeonBaseMergerWorkload.hpp new file mode 100644 index 0000000000..9ff09f6c7c --- /dev/null +++ b/src/backends/neon/workloads/NeonBaseMergerWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <backends/Workload.hpp> + +namespace armnn +{ +// Base class template providing an implementation of the Merger layer common to all data types. +template <armnn::DataType... DataTypes> +class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...> +{ +public: + using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonBaseSplitterWorkload.hpp b/src/backends/neon/workloads/NeonBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..dcee93363d --- /dev/null +++ b/src/backends/neon/workloads/NeonBaseSplitterWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template <armnn::DataType... DataTypes> +class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...> +{ +public: + using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, splitter is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..2383e78df3 --- /dev/null +++ b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonBatchNormalizationFloatWorkload.hpp" +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <armnn/ArmNN.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + descriptor.m_Eps); +} + +NeonBatchNormalizationFloatWorkload::NeonBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Mean = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void NeonBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void NeonBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn + + diff --git a/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..59c7404c44 --- /dev/null +++ b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor); + +class NeonBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> +{ +public: + NeonBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEBatchNormalizationLayer m_Layer; + + std::unique_ptr<arm_compute::Tensor> m_Mean; + std::unique_ptr<arm_compute::Tensor> m_Variance; + std::unique_ptr<arm_compute::Tensor> m_Gamma; + std::unique_ptr<arm_compute::Tensor> m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonConstantFloatWorkload.cpp b/src/backends/neon/workloads/NeonConstantFloatWorkload.cpp new file mode 100644 index 0000000000..dbdd057101 --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantFloatWorkload.hpp" + +namespace armnn +{ + +void NeonConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloatWorkload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConstantFloatWorkload.hpp b/src/backends/neon/workloads/NeonConstantFloatWorkload.hpp new file mode 100644 index 0000000000..c35b5fda3e --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantFloatWorkload : public NeonBaseConstantWorkload<DataType::Float16, DataType::Float32> +{ +public: + using NeonBaseConstantWorkload<DataType::Float16, DataType::Float32>::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConstantUint8Workload.cpp b/src/backends/neon/workloads/NeonConstantUint8Workload.cpp new file mode 100644 index 0000000000..c607d86844 --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantUint8Workload.hpp" + +namespace armnn +{ + +void NeonConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConstantUint8Workload.hpp b/src/backends/neon/workloads/NeonConstantUint8Workload.hpp new file mode 100644 index 0000000000..2cb9516afe --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantUint8Workload : public NeonBaseConstantWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseConstantWorkload<DataType::QuantisedAsymm8>::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..86ec31c71d --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast<const Half*>(src); + auto output = reinterpret_cast<float*>(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..dcf6998c64 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> +{ +public: + NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..0f4fbe4e93 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "Profiling.hpp" +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast<const float*>(src); + auto output = reinterpret_cast<Half*>(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..b819a8c542 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> +{ +public: + NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..547f563d59 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.cpp @@ -0,0 +1,146 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/neon/NeonLayerSupport.hpp> + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include <armnn/Types.hpp> +#include <Half.hpp> + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::NEConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +template<armnn::DataType... dataTypes> +NeonConvolution2dBaseWorkload<dataTypes...>::NeonConvolution2dBaseWorkload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>(descriptor, info) +{ + using arm_compute::NEDirectConvolutionLayer; + + ValidateData(); + + // todo: check tensor shapes match. + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo(), descriptor.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + const bool preferDirectConvolution = + IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(), + m_Data.m_Parameters); + + if (preferDirectConvolution) + { + auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>(memoryManager); + directConvolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(directConvolutionLayer.release()); + } + else + { + auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager); + convolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(convolutionLayer.release()); + } + BOOST_ASSERT(m_ConvolutionLayer); + + armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType(); + + switch (dataType) + { + case DataType::Float16: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Half>()); + break; + } + case DataType::Float32: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<float>()); + break; + } + case DataType::QuantisedAsymm8: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<uint8_t>()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown DataType."); + } + } +} + +template<armnn::DataType... dataTypes> +void NeonConvolution2dBaseWorkload<dataTypes...>::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generates known implementations for linker. +template class NeonConvolution2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; +template class NeonConvolution2dBaseWorkload<armnn::DataType::QuantisedAsymm8>; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..6af89c1f01 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <boost/optional.hpp> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases); + +template<armnn::DataType... dataTypes> +class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataTypes...> +{ +public: + using TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>::m_Data; + + NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + virtual void ValidateData() const {}; + +protected: + std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..cd26f8d536 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dFloatWorkload.hpp" +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/neon/NeonLayerSupport.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonConvolution2dFloatWorkload::NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloatWorkload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dFloatWorkload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dFloatWorkload", 1, 1); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..14c77c8bd0 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include <memory> + +namespace armnn +{ + +class NeonConvolution2dFloatWorkload : public NeonConvolution2dBaseWorkload<DataType::Float16, DataType::Float32> +{ +public: + NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + void Execute() const override; + void ValidateData() const override; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonConvolution2dUint8Workload.cpp b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..5affe682b4 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dUint8Workload.hpp" + +namespace armnn +{ + +NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>()); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dUint8Workload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dUint8Workload.hpp b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..ef60fc3e84 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include <memory> + +namespace armnn +{ + +class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8> +{ +public: + NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + virtual void ValidateData() const override; + virtual void Execute() const override; +private: +}; + +} //namespace armnnn + diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..ef60b3238d --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionBaseWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +}
\ No newline at end of file diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..982992a363 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include <boost/optional.hpp> + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases); + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..742a768b94 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,93 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionFloatWorkload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..0109ea10cb --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..722b778eba --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,93 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + + if (m_BiasTensor) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..90cf8b0091 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp new file mode 100644 index 0000000000..a08ba8a6ec --- /dev/null +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFloorFloatWorkload.hpp" + +namespace armnn +{ +NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<FloorQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); + m_Layer.run(); +} +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp new file mode 100644 index 0000000000..478aa94ca4 --- /dev/null +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> +{ +public: + NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..8cebb4f48f --- /dev/null +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp @@ -0,0 +1,110 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFullyConnectedWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + + return arm_compute::NEFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_WeightsTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) + { + InitialiseArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + } + + if (m_BiasesTensor) + { + if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) + { + InitialiseArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void NeonFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void NeonFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..9ffac96a86 --- /dev/null +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class NeonFullyConnectedWorkload : public BaseWorkload<FullyConnectedQueueDescriptor> +{ +public: + NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr<arm_compute::Tensor> m_WeightsTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..dee789af85 --- /dev/null +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonL2NormalizationFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) + , m_Layer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void NeonL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..c1221fb98c --- /dev/null +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class NeonL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> +{ +public: + NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + // Purposely not a NEL2Normalize function. See constructor. + mutable arm_compute::NENormalizationLayer m_Layer; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp new file mode 100644 index 0000000000..8b2b58d9b1 --- /dev/null +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonLstmFloatWorkload.hpp" + +namespace armnn +{ +NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<LstmQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonLstmFloatWorkload", 1, 1); +} + +void NeonLstmFloatWorkload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Neon backend!"); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp new file mode 100644 index 0000000000..4a5394f0a0 --- /dev/null +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> +{ +public: + NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerFloatWorkload.cpp b/src/backends/neon/workloads/NeonMergerFloatWorkload.cpp new file mode 100644 index 0000000000..79039aa51a --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerFloatWorkload.hpp" + +namespace armnn +{ + +void NeonMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloatWorkload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerFloatWorkload.hpp b/src/backends/neon/workloads/NeonMergerFloatWorkload.hpp new file mode 100644 index 0000000000..e7088b8c2f --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerFloatWorkload : public NeonBaseMergerWorkload<DataType::Float16, DataType::Float32> +{ +public: + using NeonBaseMergerWorkload<DataType::Float16, DataType::Float32>::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerUint8Workload.cpp b/src/backends/neon/workloads/NeonMergerUint8Workload.cpp new file mode 100644 index 0000000000..3989702bd3 --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerUint8Workload.hpp" + +namespace armnn +{ + +void NeonMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerUint8Workload.hpp b/src/backends/neon/workloads/NeonMergerUint8Workload.hpp new file mode 100644 index 0000000000..73c0fd55ad --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerUint8Workload : public NeonBaseMergerWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseMergerWorkload<DataType::QuantisedAsymm8>::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..c4241ece19 --- /dev/null +++ b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMultiplicationFloatWorkload.hpp" + + +namespace armnn +{ + +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonMultiplicationFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + m_PixelWiseMultiplication.configure(&input1, + &input2, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +void NeonMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute"); + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn + + diff --git a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..0a99c8cedc --- /dev/null +++ b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonMultiplicationFloatWorkload : public FloatWorkload<MultiplicationQueueDescriptor> +{ +public: + NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..472c75f222 --- /dev/null +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonNormalizationFloatWorkload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) + , m_NormalizationLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); + std::string reasonIfUnsupported; + if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + // Input and output tensors have to have the same dimensionality. + if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] + || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] + || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] + || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2]) + { + throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality."); + } + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType); + arm_compute::NormalizationLayerInfo normalizationInfo(normType, + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K, + false); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +} + +void NeonNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..c6f64c6c15 --- /dev/null +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class NeonNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> +{ +public: + NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NENormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.cpp b/src/backends/neon/workloads/NeonPermuteWorkload.cpp new file mode 100644 index 0000000000..0bf4aa1319 --- /dev/null +++ b/src/backends/neon/workloads/NeonPermuteWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPermuteWorkload.hpp" +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/core/Error.h> + +namespace armnn +{ + +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const armnn::PermutationVector& mappings = descriptor.m_DimMappings; + + return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo, + armcomputetensorutils::BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType... DataTypes> +NeonPermuteWorkload<DataTypes...>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType... DataTypes> +void NeonPermuteWorkload<DataTypes...>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class NeonPermuteWorkload<DataType::Float16, DataType::Float32>; +template class NeonPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.hpp b/src/backends/neon/workloads/NeonPermuteWorkload.hpp new file mode 100644 index 0000000000..a85816be38 --- /dev/null +++ b/src/backends/neon/workloads/NeonPermuteWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include <armnn/TypesUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEPermute.h> + +#include <string> + +namespace armnn +{ +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const PermuteDescriptor& descriptor); + +template <armnn::DataType... DataTypes> +class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...> +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("NeonPermuteWorkload"); + return name; + } + + NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data; + mutable arm_compute::NEPermute m_PermuteFunction; +}; + +using NeonPermuteFloatWorkload = NeonPermuteWorkload<DataType::Float16, DataType::Float32>; +using NeonPermuteUint8Workload = NeonPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonPooling2dBaseWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..109e856506 --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dBaseWorkload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template <armnn::DataType... dataTypes> +NeonPooling2dBaseWorkload<dataTypes...>::NeonPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class NeonPooling2dBaseWorkload<DataType::Float16, DataType::Float32>; +template class NeonPooling2dBaseWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonPooling2dBaseWorkload.hpp b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..8ea41fe18a --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template <armnn::DataType... dataTypes> +class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...> +{ +public: + using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data; + + NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::NEPoolingLayer m_PoolingLayer; +}; + + +} //namespace armnn + + + + + diff --git a/src/backends/neon/workloads/NeonPooling2dFloatWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..46996b088c --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonPooling2dFloatWorkload::NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>(descriptor, info, + "NeonPooling2dFloatWorkload") +{ +} + +void NeonPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonPooling2dFloatWorkload.hpp b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..9b0eebdc2b --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dFloatWorkload : public NeonPooling2dBaseWorkload<armnn::DataType::Float16, + armnn::DataType::Float32> +{ +public: + NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonPooling2dUint8Workload.cpp b/src/backends/neon/workloads/NeonPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..8f99a2be86 --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dUint8Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dUint8Workload.hpp" + + + +namespace armnn +{ + +NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8>(descriptor, info, "NeonPooling2dUint8Workload") +{ +} + +void NeonPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonPooling2dUint8Workload.hpp b/src/backends/neon/workloads/NeonPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..d475c5f721 --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/Types.hpp> +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8> +{ +public: + NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonReshapeFloatWorkload.cpp b/src/backends/neon/workloads/NeonReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..2dae9466bb --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonReshapeFloatWorkload::NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonReshapeFloatWorkload.hpp b/src/backends/neon/workloads/NeonReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..bdef862419 --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonReshapeFloatWorkload : public FloatWorkload<ReshapeQueueDescriptor> +{ +public: + NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/neon/workloads/NeonReshapeUint8Workload.cpp b/src/backends/neon/workloads/NeonReshapeUint8Workload.cpp new file mode 100644 index 0000000000..41aa07fe49 --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeUint8Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeUint8Workload.hpp" + + + + +namespace armnn +{ +NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute"); + m_Layer.run(); +} +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonReshapeUint8Workload.hpp b/src/backends/neon/workloads/NeonReshapeUint8Workload.hpp new file mode 100644 index 0000000000..4951873f0b --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> +{ +public: + NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..0e11d8249f --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxBaseWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..446392cd03 --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor); + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..92e5139c1a --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxFloatWorkload.hpp" + +namespace armnn +{ + +NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1); + + // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void NeonSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..83f29222eb --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +class NeonSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor> +{ +public: + NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..cff869c9b7 --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxUint8Workload.hpp" + +namespace armnn +{ + +NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void NeonSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..0d72514ec0 --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +namespace armnn +{ + +class NeonSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> +{ +public: + NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSplitterFloatWorkload.cpp b/src/backends/neon/workloads/NeonSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..39ed5b7cbc --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterFloatWorkload.hpp" + +namespace armnn +{ + +void NeonSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloatWorkload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSplitterFloatWorkload.hpp b/src/backends/neon/workloads/NeonSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..744a4fe216 --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterFloatWorkload : public NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32> +{ +public: + using NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32>::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSplitterUint8Workload.cpp b/src/backends/neon/workloads/NeonSplitterUint8Workload.cpp new file mode 100644 index 0000000000..4b2cf8fc91 --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterUint8Workload.hpp" + +namespace armnn +{ + +void NeonSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSplitterUint8Workload.hpp b/src/backends/neon/workloads/NeonSplitterUint8Workload.hpp new file mode 100644 index 0000000000..f219cfaa7d --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseSplitterWorkload<DataType::QuantisedAsymm8>::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp new file mode 100644 index 0000000000..2acb829e3d --- /dev/null +++ b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSubtractionFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticSubtraction::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + +NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<SubtractionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonSubtractionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonSubtractionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute"); + m_SubLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp new file mode 100644 index 0000000000..98aeb4cfc5 --- /dev/null +++ b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonSubtractionFloatWorkload : public FloatWorkload<SubtractionQueueDescriptor> +{ +public: + NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticSubtraction m_SubLayer; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.cpp b/src/backends/neon/workloads/NeonWorkloadUtils.cpp new file mode 100644 index 0000000000..195f090171 --- /dev/null +++ b/src/backends/neon/workloads/NeonWorkloadUtils.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NeonWorkloadUtils.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include <armnn/Utils.hpp> +#include <armnn/Exceptions.hpp> + +#include <cstring> +#include <boost/assert.hpp> +#include <boost/cast.hpp> +#include <boost/format.hpp> + +#include "Profiling.hpp" + +#include <armnn/Types.hpp> +#include <Half.hpp> + +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +// Allocates a tensor and copy the contents in data to the tensor contents. +template<typename T> +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data) +{ + InitialiseArmComputeTensorEmpty(tensor); + CopyArmComputeITensorData(data, tensor); +} + +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const Half* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const float* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const uint8_t* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const int32_t* data); + +void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, + const ConstCpuTensorHandle* handle) +{ + BOOST_ASSERT(handle); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + InitialiseArmComputeTensorData(tensor, handle->GetConstTensor<Half>()); + break; + case DataType::Float32: + InitialiseArmComputeTensorData(tensor, handle->GetConstTensor<float>()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected floating point type."); + } +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.hpp b/src/backends/neon/workloads/NeonWorkloadUtils.hpp new file mode 100644 index 0000000000..22668f6f4b --- /dev/null +++ b/src/backends/neon/workloads/NeonWorkloadUtils.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <backends/Workload.hpp> +#include <backends/neon/NeonTensorHandle.hpp> + +#include "NeonTimer.hpp" + +#include <arm_compute/core/Types.h> +#include <arm_compute/core/Helpers.h> +#include <arm_compute/runtime/NEON/NEFunctions.h> +#include <arm_compute/runtime/SubTensor.h> + +#include <boost/cast.hpp> + +namespace armnn +{ +class Layer; + +template<typename T> +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data); + +void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, const ConstCpuTensorHandle* handle); +} //namespace armnn + + +#define ARMNN_SCOPED_PROFILING_EVENT_NEON(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc, \ + name, \ + armnn::NeonTimer(), \ + armnn::WallClockTimer()) diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp new file mode 100644 index 0000000000..a4ab6b2cac --- /dev/null +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "NeonActivationFloatWorkload.hpp" +#include "NeonActivationUint8Workload.hpp" +#include "NeonAdditionFloatWorkload.hpp" +#include "NeonBaseConstantWorkload.hpp" +#include "NeonBaseMergerWorkload.hpp" +#include "NeonBaseSplitterWorkload.hpp" +#include "NeonBatchNormalizationFloatWorkload.hpp" +#include "NeonConstantFloatWorkload.hpp" +#include "NeonConstantUint8Workload.hpp" +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "NeonConvertFp32ToFp16Workload.hpp" +#include "NeonConvolution2dBaseWorkload.hpp" +#include "NeonConvolution2dFloatWorkload.hpp" +#include "NeonConvolution2dUint8Workload.hpp" +#include "NeonDepthwiseConvolutionFloatWorkload.hpp" +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "NeonFloorFloatWorkload.hpp" +#include "NeonFullyConnectedWorkload.hpp" +#include "NeonL2NormalizationFloatWorkload.hpp" +#include "NeonLstmFloatWorkload.hpp" +#include "NeonMergerFloatWorkload.hpp" +#include "NeonMergerUint8Workload.hpp" +#include "NeonMultiplicationFloatWorkload.hpp" +#include "NeonNormalizationFloatWorkload.hpp" +#include "NeonPermuteWorkload.hpp" +#include "NeonPooling2dBaseWorkload.hpp" +#include "NeonPooling2dFloatWorkload.hpp" +#include "NeonPooling2dUint8Workload.hpp" +#include "NeonReshapeFloatWorkload.hpp" +#include "NeonReshapeUint8Workload.hpp" +#include "NeonSoftmaxFloatWorkload.hpp" +#include "NeonSoftmaxUint8Workload.hpp" +#include "NeonSplitterFloatWorkload.hpp" +#include "NeonSplitterUint8Workload.hpp" +#include "NeonSubtractionFloatWorkload.hpp" |