From 10b4dfd8e9ccd7a03df7bb053ee1c644cb37f8ab Mon Sep 17 00:00:00 2001 From: David Beck Date: Wed, 19 Sep 2018 12:03:20 +0100 Subject: IVGCVSW-1897 : build infrastructure for the src/backends folder Change-Id: I7ebafb675ccc77ad54d1deb01412a8379a5356bb --- src/backends/ClWorkloads/CMakeLists.txt | 89 +++++ .../ClWorkloads/ClActivationFloatWorkload.cpp | 56 +++ .../ClWorkloads/ClActivationFloatWorkload.hpp | 29 ++ .../ClWorkloads/ClActivationUint8Workload.cpp | 44 +++ .../ClWorkloads/ClActivationUint8Workload.hpp | 29 ++ src/backends/ClWorkloads/ClAdditionWorkload.cpp | 66 ++++ src/backends/ClWorkloads/ClAdditionWorkload.hpp | 31 ++ .../ClWorkloads/ClBaseConstantWorkload.cpp | 64 ++++ .../ClWorkloads/ClBaseConstantWorkload.hpp | 30 ++ src/backends/ClWorkloads/ClBaseMergerWorkload.hpp | 28 ++ .../ClWorkloads/ClBaseSplitterWorkload.hpp | 28 ++ .../ClBatchNormalizationFloatWorkload.cpp | 96 +++++ .../ClBatchNormalizationFloatWorkload.hpp | 46 +++ .../ClWorkloads/ClConstantFloatWorkload.cpp | 18 + .../ClWorkloads/ClConstantFloatWorkload.hpp | 20 + .../ClWorkloads/ClConstantUint8Workload.cpp | 18 + .../ClWorkloads/ClConstantUint8Workload.hpp | 20 + .../ClWorkloads/ClConvertFp16ToFp32Workload.cpp | 66 ++++ .../ClWorkloads/ClConvertFp16ToFp32Workload.hpp | 30 ++ .../ClWorkloads/ClConvertFp32ToFp16Workload.cpp | 66 ++++ .../ClWorkloads/ClConvertFp32ToFp16Workload.hpp | 30 ++ .../ClWorkloads/ClConvolution2dBaseWorkload.cpp | 48 +++ .../ClWorkloads/ClConvolution2dBaseWorkload.hpp | 24 ++ .../ClWorkloads/ClConvolution2dFloatWorkload.cpp | 81 ++++ .../ClWorkloads/ClConvolution2dFloatWorkload.hpp | 35 ++ .../ClWorkloads/ClConvolution2dUint8Workload.cpp | 81 ++++ .../ClWorkloads/ClConvolution2dUint8Workload.hpp | 35 ++ .../ClDepthwiseConvolutionBaseWorkload.cpp | 125 +++++++ .../ClDepthwiseConvolutionBaseWorkload.hpp | 40 ++ .../ClDepthwiseConvolutionFloatWorkload.cpp | 39 ++ .../ClDepthwiseConvolutionFloatWorkload.hpp | 26 ++ .../ClDepthwiseConvolutionUint8Workload.cpp | 40 ++ .../ClDepthwiseConvolutionUint8Workload.hpp | 23 ++ .../ClWorkloads/ClDivisionFloatWorkload.cpp | 48 +++ .../ClWorkloads/ClDivisionFloatWorkload.hpp | 32 ++ src/backends/ClWorkloads/ClFloorFloatWorkload.cpp | 31 ++ src/backends/ClWorkloads/ClFloorFloatWorkload.hpp | 30 ++ .../ClWorkloads/ClFullyConnectedWorkload.cpp | 111 ++++++ .../ClWorkloads/ClFullyConnectedWorkload.hpp | 43 +++ .../ClWorkloads/ClL2NormalizationFloatWorkload.cpp | 49 +++ .../ClWorkloads/ClL2NormalizationFloatWorkload.hpp | 34 ++ src/backends/ClWorkloads/ClLstmFloatWorkload.cpp | 408 +++++++++++++++++++++ src/backends/ClWorkloads/ClLstmFloatWorkload.hpp | 68 ++++ src/backends/ClWorkloads/ClMergerFloatWorkload.cpp | 20 + src/backends/ClWorkloads/ClMergerFloatWorkload.hpp | 22 ++ src/backends/ClWorkloads/ClMergerUint8Workload.cpp | 19 + src/backends/ClWorkloads/ClMergerUint8Workload.hpp | 21 ++ .../ClWorkloads/ClMultiplicationFloatWorkload.cpp | 60 +++ .../ClWorkloads/ClMultiplicationFloatWorkload.hpp | 34 ++ .../ClWorkloads/ClNormalizationFloatWorkload.cpp | 50 +++ .../ClWorkloads/ClNormalizationFloatWorkload.hpp | 29 ++ src/backends/ClWorkloads/ClPermuteWorkload.cpp | 56 +++ src/backends/ClWorkloads/ClPermuteWorkload.hpp | 42 +++ .../ClWorkloads/ClPooling2dBaseWorkload.cpp | 47 +++ .../ClWorkloads/ClPooling2dBaseWorkload.hpp | 33 ++ .../ClWorkloads/ClPooling2dFloatWorkload.cpp | 26 ++ .../ClWorkloads/ClPooling2dFloatWorkload.hpp | 22 ++ .../ClWorkloads/ClPooling2dUint8Workload.cpp | 27 ++ .../ClWorkloads/ClPooling2dUint8Workload.hpp | 25 ++ .../ClWorkloads/ClReshapeFloatWorkload.cpp | 33 ++ .../ClWorkloads/ClReshapeFloatWorkload.hpp | 28 ++ .../ClWorkloads/ClReshapeUint8Workload.cpp | 31 ++ .../ClWorkloads/ClReshapeUint8Workload.hpp | 29 ++ .../ClWorkloads/ClResizeBilinearFloatWorkload.cpp | 38 ++ .../ClWorkloads/ClResizeBilinearFloatWorkload.hpp | 25 ++ src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp | 30 ++ src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp | 17 + .../ClWorkloads/ClSoftmaxFloatWorkload.cpp | 33 ++ .../ClWorkloads/ClSoftmaxFloatWorkload.hpp | 30 ++ .../ClWorkloads/ClSoftmaxUint8Workload.cpp | 43 +++ .../ClWorkloads/ClSoftmaxUint8Workload.hpp | 31 ++ .../ClWorkloads/ClSplitterFloatWorkload.cpp | 19 + .../ClWorkloads/ClSplitterFloatWorkload.hpp | 20 + .../ClWorkloads/ClSplitterUint8Workload.cpp | 19 + .../ClWorkloads/ClSplitterUint8Workload.hpp | 21 ++ src/backends/ClWorkloads/ClSubtractionWorkload.cpp | 66 ++++ src/backends/ClWorkloads/ClSubtractionWorkload.hpp | 31 ++ src/backends/ClWorkloads/ClWorkloadUtils.hpp | 62 ++++ src/backends/ClWorkloads/backend.cmake | 9 + 79 files changed, 3503 insertions(+) create mode 100644 src/backends/ClWorkloads/CMakeLists.txt create mode 100644 src/backends/ClWorkloads/ClActivationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClActivationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClActivationUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClActivationUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClAdditionWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClAdditionWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBaseConstantWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClBaseConstantWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBaseMergerWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConstantFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClConstantFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConstantUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConstantUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp create mode 100644 src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClFloorFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClFloorFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClLstmFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClLstmFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClMergerFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClMergerFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClMergerUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClMergerUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPermuteWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClPermuteWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClReshapeUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClReshapeUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClSplitterUint8Workload.cpp create mode 100644 src/backends/ClWorkloads/ClSplitterUint8Workload.hpp create mode 100644 src/backends/ClWorkloads/ClSubtractionWorkload.cpp create mode 100644 src/backends/ClWorkloads/ClSubtractionWorkload.hpp create mode 100644 src/backends/ClWorkloads/ClWorkloadUtils.hpp create mode 100644 src/backends/ClWorkloads/backend.cmake (limited to 'src/backends/ClWorkloads') diff --git a/src/backends/ClWorkloads/CMakeLists.txt b/src/backends/ClWorkloads/CMakeLists.txt new file mode 100644 index 0000000000..ac935b5cf7 --- /dev/null +++ b/src/backends/ClWorkloads/CMakeLists.txt @@ -0,0 +1,89 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnClBackend_sources + ClActivationFloatWorkload.cpp + ClActivationFloatWorkload.hpp + ClActivationUint8Workload.cpp + ClActivationUint8Workload.hpp + ClAdditionWorkload.cpp + ClAdditionWorkload.hpp + ClBaseConstantWorkload.cpp + ClBaseConstantWorkload.hpp + ClBaseMergerWorkload.hpp + ClBaseSplitterWorkload.hpp + ClBatchNormalizationFloatWorkload.cpp + ClBatchNormalizationFloatWorkload.hpp + ClConstantFloatWorkload.cpp + ClConstantFloatWorkload.hpp + ClConstantUint8Workload.cpp + ClConstantUint8Workload.hpp + ClConvertFp16ToFp32Workload.cpp + ClConvertFp16ToFp32Workload.hpp + ClConvertFp32ToFp16Workload.cpp + ClConvertFp32ToFp16Workload.hpp + ClConvolution2dBaseWorkload.cpp + ClConvolution2dBaseWorkload.hpp + ClConvolution2dFloatWorkload.cpp + ClConvolution2dFloatWorkload.hpp + ClConvolution2dUint8Workload.cpp + ClConvolution2dUint8Workload.hpp + ClDepthwiseConvolutionBaseWorkload.cpp + ClDepthwiseConvolutionBaseWorkload.hpp + ClDepthwiseConvolutionFloatWorkload.cpp + ClDepthwiseConvolutionFloatWorkload.hpp + ClDepthwiseConvolutionUint8Workload.cpp + ClDepthwiseConvolutionUint8Workload.hpp + ClDivisionFloatWorkload.cpp + ClDivisionFloatWorkload.hpp + ClFloorFloatWorkload.cpp + ClFloorFloatWorkload.hpp + ClFullyConnectedWorkload.cpp + ClFullyConnectedWorkload.hpp + ClL2NormalizationFloatWorkload.cpp + ClL2NormalizationFloatWorkload.hpp + ClLstmFloatWorkload.cpp + ClLstmFloatWorkload.hpp + ClMergerFloatWorkload.cpp + ClMergerFloatWorkload.hpp + ClMergerUint8Workload.cpp + ClMergerUint8Workload.hpp + ClMultiplicationFloatWorkload.cpp + ClMultiplicationFloatWorkload.hpp + ClNormalizationFloatWorkload.cpp + ClNormalizationFloatWorkload.hpp + ClPermuteWorkload.cpp + ClPermuteWorkload.hpp + ClPooling2dBaseWorkload.cpp + ClPooling2dBaseWorkload.hpp + ClPooling2dFloatWorkload.cpp + ClPooling2dFloatWorkload.hpp + ClPooling2dUint8Workload.cpp + ClPooling2dUint8Workload.hpp + ClReshapeFloatWorkload.cpp + ClReshapeFloatWorkload.hpp + ClReshapeUint8Workload.cpp + ClReshapeUint8Workload.hpp + ClResizeBilinearFloatWorkload.cpp + ClResizeBilinearFloatWorkload.hpp + ClSoftmaxBaseWorkload.cpp + ClSoftmaxBaseWorkload.hpp + ClSoftmaxFloatWorkload.cpp + ClSoftmaxFloatWorkload.hpp + ClSoftmaxUint8Workload.cpp + ClSoftmaxUint8Workload.hpp + ClSplitterFloatWorkload.cpp + ClSplitterFloatWorkload.hpp + ClSplitterUint8Workload.cpp + ClSplitterUint8Workload.hpp + ClSubtractionWorkload.cpp + ClSubtractionWorkload.hpp + ClWorkloadUtils.hpp +) + +add_library(armnnClBackend STATIC ${armnnClBackend_sources}) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnClBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp b/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp new file mode 100644 index 0000000000..97078bddd8 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationFloatWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "CL: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::CLActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +ClActivationFloatWorkload::ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void ClActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp b/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp new file mode 100644 index 0000000000..e1b6fe13d8 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ +arm_compute::Status ClActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +// Activation layer execution. +class ClActivationFloatWorkload : public FloatWorkload +{ +public: + ClActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClActivationUint8Workload.cpp b/src/backends/ClWorkloads/ClActivationUint8Workload.cpp new file mode 100644 index 0000000000..f39c856aa9 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationUint8Workload.cpp @@ -0,0 +1,44 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClActivationUint8Workload.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "backends/ArmComputeUtils.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClActivationUint8Workload::ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("ClActivationUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void ClActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} + +} //namespace Armnn + + diff --git a/src/backends/ClWorkloads/ClActivationUint8Workload.hpp b/src/backends/ClWorkloads/ClActivationUint8Workload.hpp new file mode 100644 index 0000000000..bb2ff58853 --- /dev/null +++ b/src/backends/ClWorkloads/ClActivationUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Activation layer execution. +class ClActivationUint8Workload : public Uint8Workload +{ +public: + ClActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClAdditionWorkload.cpp b/src/backends/ClWorkloads/ClAdditionWorkload.cpp new file mode 100644 index 0000000000..dd439d59a9 --- /dev/null +++ b/src/backends/ClWorkloads/ClAdditionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClAdditionWorkload.hpp" + +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template +ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClAdditionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template +void ClAdditionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); + m_Layer.run(); +} + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClAdditionWorkload; +template class armnn::ClAdditionWorkload; diff --git a/src/backends/ClWorkloads/ClAdditionWorkload.hpp b/src/backends/ClWorkloads/ClAdditionWorkload.hpp new file mode 100644 index 0000000000..b4706890d1 --- /dev/null +++ b/src/backends/ClWorkloads/ClAdditionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +template +class ClAdditionWorkload : public TypedWorkload +{ +public: + ClAdditionWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticAddition m_Layer; +}; + +bool ClAdditionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp b/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp new file mode 100644 index 0000000000..021d17512f --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseConstantWorkload.cpp @@ -0,0 +1,64 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBaseConstantWorkload.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "Half.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +template class ClBaseConstantWorkload; +template class ClBaseConstantWorkload; + +template +void ClBaseConstantWorkload::Execute() const +{ + // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data + // on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer may not + // have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::CLTensor& output = static_cast(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = static_cast(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeClTensorData(data.m_LayerOutput->GetConstTensor(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp b/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp new file mode 100644 index 0000000000..ca1db389dc --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseConstantWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ +template +class ClBaseConstantWorkload : public TypedWorkload +{ +public: + ClBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload(descriptor, info) + , m_RanOnce(false) + { + } + + void Execute() const override; + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp b/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp new file mode 100644 index 0000000000..420e074217 --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseMergerWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Merger layer common to all data types. +template +class ClBaseMergerWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp b/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..41f382cac8 --- /dev/null +++ b/src/backends/ClWorkloads/ClBaseSplitterWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template +class ClBaseSplitterWorkload : public TypedWorkload +{ +public: + using TypedWorkload::TypedWorkload; + + void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..021734aaa6 --- /dev/null +++ b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBatchNormalizationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor &desc) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + desc.m_Eps); +} + +ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Mean = std::make_unique(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Data.ValidateInputsOutputs("ClBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeClTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeClTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeClTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + InitializeArmComputeClTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void ClBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..22c71b1073 --- /dev/null +++ b/src/backends/ClWorkloads/ClBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& desc); + +class ClBatchNormalizationFloatWorkload : public FloatWorkload +{ +public: + ClBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLBatchNormalizationLayer m_Layer; + + std::unique_ptr m_Mean; + std::unique_ptr m_Variance; + std::unique_ptr m_Gamma; + std::unique_ptr m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp b/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp new file mode 100644 index 0000000000..1565047c22 --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantFloatWorkload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantFloatWorkload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantFloatWorkload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp b/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp new file mode 100644 index 0000000000..0cbeaad9ea --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ +class ClConstantFloatWorkload : public ClBaseConstantWorkload +{ +public: + using ClBaseConstantWorkload::ClBaseConstantWorkload; + void Execute() const override; +}; + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantUint8Workload.cpp b/src/backends/ClWorkloads/ClConstantUint8Workload.cpp new file mode 100644 index 0000000000..a5ef0321cd --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantUint8Workload.cpp @@ -0,0 +1,18 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConstantUint8Workload.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantUint8Workload_Execute"); + ClBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConstantUint8Workload.hpp b/src/backends/ClWorkloads/ClConstantUint8Workload.hpp new file mode 100644 index 0000000000..30556dc0d6 --- /dev/null +++ b/src/backends/ClWorkloads/ClConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseConstantWorkload.hpp" + +namespace armnn +{ + +class ClConstantUint8Workload : public ClBaseConstantWorkload +{ +public: + using ClBaseConstantWorkload::ClBaseConstantWorkload; + void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..534249aeac --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp16ToFp32Workload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( + const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float16ToFloat32Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp16ToFp32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Input should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Output should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..c72d2262b3 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp16ToFp32Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClConvertFp16ToFp32Workload : public Float16ToFloat32Workload +{ +public: + + ClConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..73b3cbc542 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvertFp32ToFp16Workload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( + const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) : + Float32ToFloat16Workload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClConvertFp32ToFp16Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output, g_AclConvertPolicy, 0); +} + +void ClConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); + m_Layer.run(); +} + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + if (input.GetDataType() != DataType::Float32) + { + *reasonIfUnsupported = "Input should be Float32"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + if (output.GetDataType() != DataType::Float16) + { + *reasonIfUnsupported = "Output should be Float16"; + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, *reasonIfUnsupported); + } + + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLDepthConvertLayer::validate( + &aclInputInfo, &aclOutputInfo, g_AclConvertPolicy, 0); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return aclStatus; +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..fb6af02070 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvertFp32ToFp16Workload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClConvertFp32ToFp16Workload : public Float32ToFloat16Workload +{ +public: + + ClConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::CLDepthConvertLayer m_Layer; +}; + +arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..228f17d54e --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dBaseWorkload.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +} diff --git a/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..a983dba79a --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dBaseWorkload.hpp @@ -0,0 +1,24 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..029f41d5dc --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dFloatWorkload::ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + + // todo: check tensor shapes match. + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dFloat32Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dFloat32Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..28ba53f38a --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dFloatWorkload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include + +#include + +namespace armnn +{ + +class ClConvolution2dFloatWorkload : public FloatWorkload +{ +public: + ClConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..e6783b698a --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.cpp @@ -0,0 +1,81 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClConvolution2dUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClConvolution2dUint8Workload::ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_ConvolutionLayer(memoryManager) +{ + // todo: check tensor shapes match + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClConvolution2dUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ConvolutionLayer.configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + + InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor()); + + if (m_BiasTensor) + { + InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor()); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_ConvolutionLayer.prepare(); + FreeUnusedTensors(); +} + +void ClConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvolution2dUint8Workload_Execute"); + + m_ConvolutionLayer.run(); +} + +void ClConvolution2dUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..f1f008b1b9 --- /dev/null +++ b/src/backends/ClWorkloads/ClConvolution2dUint8Workload.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include + +#include + +namespace armnn +{ + +class ClConvolution2dUint8Workload : public Uint8Workload +{ +public: + ClConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..0e89a68118 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,125 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +#include "TypeUtils.hpp" + +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get()); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +template +ClDepthwiseConvolutionBaseWorkload::ClDepthwiseConvolutionBaseWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + auto& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo()); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + std::string name = std::string("ClDepthwiseConvolution") + + GetDataTypeName(m_Data.m_Weight->GetTensorInfo().GetDataType()) + "Workload"; + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + const unsigned int depthMultiplier = weightInfo.GetShape()[0]; + + //Check for optimisation opportunities. + bool use3x3Optimisation = (weightInfo.GetShape()[3] == 3) && (weightInfo.GetShape()[2] == 3); + if (use3x3Optimisation) + { + m_DepthwiseConvolutionLayer = std::make_unique(); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + else + { + m_DepthwiseConvolutionLayer = std::make_unique(); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier); + } + + BOOST_ASSERT(m_DepthwiseConvolutionLayer); +} + +template +void ClDepthwiseConvolutionBaseWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generate known implementations for linker +template class ClDepthwiseConvolutionBaseWorkload; +template class ClDepthwiseConvolutionBaseWorkload; + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..49a8b5d357 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional& biases); + +template +class ClDepthwiseConvolutionBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + ClDepthwiseConvolutionBaseWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + +protected: + std::unique_ptr m_DepthwiseConvolutionLayer; + + std::unique_ptr m_KernelTensor; + std::unique_ptr m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..635ae1f327 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,39 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionFloatWorkload.hpp" + +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionFloatWorkload::ClDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitializeArmComputeClTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeClTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..4f9d5f332e --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionFloatWorkload : public ClDepthwiseConvolutionBaseWorkload +{ +public: + ClDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..af5836e908 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDepthwiseConvolutionUint8Workload.hpp" + +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClDepthwiseConvolutionUint8Workload::ClDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClDepthwiseConvolutionBaseWorkload(descriptor, info) +{ + InitialiseArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor()); + + if (m_BiasTensor) + { + InitialiseArmComputeClTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor()); + } + + m_DepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void ClDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_DepthwiseConvolutionLayer); + + m_DepthwiseConvolutionLayer->run(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..b9f676de94 --- /dev/null +++ b/src/backends/ClWorkloads/ClDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,23 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClDepthwiseConvolutionBaseWorkload.hpp" + +namespace armnn +{ + +class ClDepthwiseConvolutionUint8Workload : public ClDepthwiseConvolutionBaseWorkload +{ +public: + ClDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp b/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp new file mode 100644 index 0000000000..2371789035 --- /dev/null +++ b/src/backends/ClWorkloads/ClDivisionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClDivisionFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); +} + + +ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClDivisionFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_ArithmeticDivision.configure(&input0, &input1, &output); +} + +void ClDivisionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionFloatWorkload_Execute"); + + // Executes the layer. + m_ArithmeticDivision.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp b/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp new file mode 100644 index 0000000000..d34e11dab8 --- /dev/null +++ b/src/backends/ClWorkloads/ClDivisionFloatWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClDivisionFloatWorkload : public FloatWorkload +{ +public: + ClDivisionFloatWorkload(const DivisionQueueDescriptor& descriptor, const + WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticDivision m_ArithmeticDivision; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp b/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp new file mode 100644 index 0000000000..d090a7da81 --- /dev/null +++ b/src/backends/ClWorkloads/ClFloorFloatWorkload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFloorFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClFloorFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp b/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp new file mode 100644 index 0000000000..f269bcf30c --- /dev/null +++ b/src/backends/ClWorkloads/ClFloorFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClFloorFloatWorkload : public FloatWorkload +{ +public: + ClFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp b/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..8d2fd0e909 --- /dev/null +++ b/src/backends/ClWorkloads/ClFullyConnectedWorkload.cpp @@ -0,0 +1,111 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClFullyConnectedWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + return arm_compute::CLFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr& memoryManager) + : BaseWorkload(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_WeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) + { + InitialiseArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor()); + } + else + { + InitializeArmComputeClTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + } + + if (m_BiasesTensor) + { + if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) + { + InitialiseArmComputeClTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor()); + } + else + { + InitializeArmComputeClTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void ClFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void ClFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp b/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..a61610992e --- /dev/null +++ b/src/backends/ClWorkloads/ClFullyConnectedWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class ClFullyConnectedWorkload : public armnn::BaseWorkload +{ +public: + ClFullyConnectedWorkload(const armnn::FullyConnectedQueueDescriptor& descriptor, + const armnn::WorkloadInfo& info, + std::shared_ptr& memoryManager); + + using armnn::BaseWorkload::m_Data; + void Execute() const override; + +private: + mutable arm_compute::CLFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr m_WeightsTensor; + std::unique_ptr m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..4ccaae3430 --- /dev/null +++ b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClL2NormalizationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::CLNormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void ClL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f3f7de110a --- /dev/null +++ b/src/backends/ClWorkloads/ClL2NormalizationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class ClL2NormalizationFloatWorkload : public FloatWorkload +{ +public: + ClL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + // Purposely not a CLL2Normalize function. See constructor. + mutable arm_compute::CLNormalizationLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp b/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp new file mode 100644 index 0000000000..09a34c2d02 --- /dev/null +++ b/src/backends/ClWorkloads/ClLstmFloatWorkload.cpp @@ -0,0 +1,408 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClLstmFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/ClLayerSupport.hpp" + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) + : FloatWorkload(descriptor, info) +{ + arm_compute::LSTMParams lstm_param; + + // Basic parameters + m_InputToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights->GetTensorInfo()); + + m_InputToCellWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToCellWeightsTensor, m_Data.m_InputToCellWeights->GetTensorInfo()); + + m_InputToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToOutputWeightsTensor, m_Data.m_InputToOutputWeights->GetTensorInfo()); + + m_RecurrentToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToForgetWeightsTensor, m_Data.m_RecurrentToForgetWeights->GetTensorInfo()); + + m_RecurrentToCellWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToCellWeightsTensor, m_Data.m_RecurrentToCellWeights->GetTensorInfo()); + + m_RecurrentToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToOutputWeightsTensor, m_Data.m_RecurrentToOutputWeights->GetTensorInfo()); + + m_ForgetGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_ForgetGateBiasTensor, m_Data.m_ForgetGateBias->GetTensorInfo()); + + m_CellBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellBiasTensor, m_Data.m_CellBias->GetTensorInfo()); + + m_OutputGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_OutputGateBiasTensor, m_Data.m_OutputGateBias->GetTensorInfo()); + + // for future reference: check the AndroidNN API for the logic here + if (!m_Data.m_Parameters.m_CifgEnabled) + { + m_InputToInputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights->GetTensorInfo()); + + m_RecurrentToInputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_RecurrentToInputWeightsTensor, m_Data.m_RecurrentToInputWeights->GetTensorInfo()); + + m_CellToInputWeightsTensor = std::make_unique(); + if (m_Data.m_CellToInputWeights != nullptr) + { + BuildArmComputeTensor(*m_CellToInputWeightsTensor, m_Data.m_CellToInputWeights->GetTensorInfo()); + } + + m_InputGateBiasTensor = std::make_unique(); + BuildArmComputeTensor(*m_InputGateBiasTensor, m_Data.m_InputGateBias->GetTensorInfo()); + + lstm_param.set_cifg_params(m_InputToInputWeightsTensor.get(), + m_RecurrentToInputWeightsTensor.get(), + m_Data.m_CellToInputWeights != nullptr ? m_CellToInputWeightsTensor.get() : nullptr, + m_InputGateBiasTensor.get()); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + m_ProjectionWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_ProjectionWeightsTensor, m_Data.m_ProjectionWeights->GetTensorInfo()); + + m_ProjectionBiasTensor = std::make_unique(); + if (m_Data.m_ProjectionBias != nullptr) + { + BuildArmComputeTensor(*m_ProjectionBiasTensor, m_Data.m_ProjectionBias->GetTensorInfo()); + } + + lstm_param.set_projection_params(m_ProjectionWeightsTensor.get(), + m_Data.m_ProjectionBias != nullptr ? m_ProjectionBiasTensor.get() : nullptr); + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + m_CellToForgetWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellToForgetWeightsTensor, m_Data.m_CellToForgetWeights->GetTensorInfo()); + + m_CellToOutputWeightsTensor = std::make_unique(); + BuildArmComputeTensor(*m_CellToOutputWeightsTensor, m_Data.m_CellToOutputWeights->GetTensorInfo()); + + lstm_param.set_peephole_params(m_CellToForgetWeightsTensor.get(), m_CellToOutputWeightsTensor.get()); + } + + const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + const arm_compute::ICLTensor& output_state_in = static_cast(m_Data.m_Inputs[1])->GetTensor(); + const arm_compute::ICLTensor& cell_state_in = static_cast(m_Data.m_Inputs[2])->GetTensor(); + + arm_compute::ICLTensor& output_state_out = static_cast(m_Data.m_Outputs[1])->GetTensor(); + arm_compute::ICLTensor& cell_state_out = static_cast(m_Data.m_Outputs[2])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[3])->GetTensor(); + + // Get the batch_size and the num_units from the cellStateIn dimensions + const TensorInfo& inputTensorInfo = info.m_InputTensorInfos[2]; + const unsigned int batch_size = boost::numeric_cast(inputTensorInfo.GetShape()[0]); + const unsigned int num_units = boost::numeric_cast(inputTensorInfo.GetShape()[1]); + + m_ScratchBuffer = std::make_unique(); + if (m_Data.m_Parameters.m_CifgEnabled) + { + // 2D tensor with dimensions [num_units * 4, batch_size] with CIFG + armnn::TensorInfo scratchBuffer1({ batch_size, num_units * 4 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer1); + } + else + { + // scratch_buffer [num_units * 3, batch_size] without CIFG + armnn::TensorInfo scratchBuffer2({ batch_size, num_units * 3 }, DataType::Float32); + BuildArmComputeTensor(*m_ScratchBuffer, scratchBuffer2); + } + + float cell_threshold = m_Data.m_Parameters.m_ClippingThresCell; + float projection_threshold = m_Data.m_Parameters.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (m_Data.m_Parameters.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (m_Data.m_Parameters.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (m_Data.m_Parameters.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + + m_LstmLayer.configure(&input, m_InputToForgetWeightsTensor.get(), m_InputToCellWeightsTensor.get(), + m_InputToOutputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), + m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), + m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), + &output_state_in, &cell_state_in, m_ScratchBuffer.get(), &output_state_out, + &cell_state_out, &output, lstm_param, activationLayerInfo, + cell_threshold, projection_threshold); + + armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); + + InitialiseArmComputeClTensorData(*m_InputToForgetWeightsTensor, + m_Data.m_InputToForgetWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_InputToCellWeightsTensor, + m_Data.m_InputToCellWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_InputToOutputWeightsTensor, + m_Data.m_InputToOutputWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToForgetWeightsTensor, + m_Data.m_RecurrentToForgetWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToCellWeightsTensor, + m_Data.m_RecurrentToCellWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToOutputWeightsTensor, + m_Data.m_RecurrentToOutputWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_ForgetGateBiasTensor, + m_Data.m_ForgetGateBias->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_CellBiasTensor, + m_Data.m_CellBias->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_OutputGateBiasTensor, + m_Data.m_OutputGateBias->GetConstTensor()); + + if (!m_Data.m_Parameters.m_CifgEnabled) + { + InitialiseArmComputeClTensorData(*m_InputToInputWeightsTensor, + m_Data.m_InputToInputWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_RecurrentToInputWeightsTensor, + m_Data.m_RecurrentToInputWeights->GetConstTensor()); + if (m_Data.m_CellToInputWeights != nullptr) + { + InitialiseArmComputeClTensorData(*m_CellToInputWeightsTensor, + m_Data.m_CellToInputWeights->GetConstTensor()); + } + InitialiseArmComputeClTensorData(*m_InputGateBiasTensor, + m_Data.m_InputGateBias->GetConstTensor()); + } + + if (m_Data.m_Parameters.m_ProjectionEnabled) + { + InitialiseArmComputeClTensorData(*m_ProjectionWeightsTensor, + m_Data.m_ProjectionWeights->GetConstTensor()); + if (m_Data.m_ProjectionBias != nullptr) + { + InitialiseArmComputeClTensorData(*m_ProjectionBiasTensor, + m_Data.m_ProjectionBias->GetConstTensor()); + } + } + + if (m_Data.m_Parameters.m_PeepholeEnabled) + { + InitialiseArmComputeClTensorData(*m_CellToForgetWeightsTensor, + m_Data.m_CellToForgetWeights->GetConstTensor()); + InitialiseArmComputeClTensorData(*m_CellToOutputWeightsTensor, + m_Data.m_CellToOutputWeights->GetConstTensor()); + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_LstmLayer.prepare(); + FreeUnusedTensors(); +} + +void ClLstmFloatWorkload::Execute() const +{ + m_LstmLayer.run(); +} + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights) +{ + arm_compute::LSTMParams lstm_params_info; + + // The inputs and the outputs + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputStateInInfo = BuildArmComputeTensorInfo(outputStateIn); + const arm_compute::TensorInfo aclCellStateInInfo = BuildArmComputeTensorInfo(cellStateIn); + const arm_compute::TensorInfo aclScratchBufferInfo = BuildArmComputeTensorInfo(scratchBuffer); + const arm_compute::TensorInfo aclOutputStateOutInfo = BuildArmComputeTensorInfo(outputStateOut); + const arm_compute::TensorInfo aclCellStateOutInfo = BuildArmComputeTensorInfo(cellStateOut); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + // Basic parameters + const arm_compute::TensorInfo aclInputToForgetWeightsInfo = BuildArmComputeTensorInfo(inputToForgetWeights); + const arm_compute::TensorInfo aclInputToCellWeightsInfo = BuildArmComputeTensorInfo(inputToCellWeights); + const arm_compute::TensorInfo aclInputToOutputWeightsInfo = BuildArmComputeTensorInfo(inputToOutputWeights); + const arm_compute::TensorInfo aclRecurrentToForgetWeightsInfo + = BuildArmComputeTensorInfo(recurrentToForgetWeights); + const arm_compute::TensorInfo aclRecurrentToCellWeightsInfo + = BuildArmComputeTensorInfo(recurrentToCellWeights); + const arm_compute::TensorInfo aclRecurrentToOutputWeightsInfo + = BuildArmComputeTensorInfo(recurrentToOutputWeights); + const arm_compute::TensorInfo aclForgetGateBiasInfo = BuildArmComputeTensorInfo(forgetGateBias); + const arm_compute::TensorInfo aclCellBiasInfo = BuildArmComputeTensorInfo(cellBias); + const arm_compute::TensorInfo aclOutputGateBiasInfo = BuildArmComputeTensorInfo(outputGateBias); + + arm_compute::TensorInfo aclInputToInputWeightsInfo; + arm_compute::TensorInfo aclRecurrentToInputWeightsInfo; + arm_compute::TensorInfo aclCellToInputWeightsInfo; + arm_compute::TensorInfo aclInputGateBiasInfo; + arm_compute::TensorInfo aclProjectionWeightsInfo; + arm_compute::TensorInfo aclProjectionBiasInfo; + arm_compute::TensorInfo aclCellToForgetWeightsInfo; + arm_compute::TensorInfo aclCellToOutputWeightsInfo; + + if (!descriptor.m_CifgEnabled) + { + armnn::TensorInfo inputToInputWInfo = *inputToInputWeights; + aclInputToInputWeightsInfo = BuildArmComputeTensorInfo(inputToInputWInfo); + armnn::TensorInfo recurrentToInputWInfo = *recurrentToInputWeights; + aclRecurrentToInputWeightsInfo = BuildArmComputeTensorInfo(recurrentToInputWInfo); + + if (cellToInputWeights != nullptr) + { + armnn::TensorInfo cellToInputWInfo = *cellToInputWeights; + aclCellToInputWeightsInfo = BuildArmComputeTensorInfo(cellToInputWInfo); + } + armnn::TensorInfo inputGateBiasInfo = *inputGateBias; + aclInputGateBiasInfo = BuildArmComputeTensorInfo(inputGateBiasInfo); + lstm_params_info.set_cifg_params(&aclInputToInputWeightsInfo, &aclRecurrentToInputWeightsInfo, + cellToInputWeights != nullptr ? &aclCellToInputWeightsInfo: nullptr, + &aclInputGateBiasInfo); + } + + if (descriptor.m_ProjectionEnabled) + { + const armnn::TensorInfo& projectionWInfo = *projectionWeights; + aclProjectionWeightsInfo = BuildArmComputeTensorInfo(projectionWInfo); + + if (projectionBias != nullptr) + { + const armnn::TensorInfo& projectionBiasInfo = *projectionBias; + aclProjectionBiasInfo = BuildArmComputeTensorInfo(projectionBiasInfo); + } + lstm_params_info.set_projection_params(&aclProjectionWeightsInfo, + projectionBias != nullptr ? &aclProjectionBiasInfo: nullptr); + } + + if (descriptor.m_PeepholeEnabled) + { + const armnn::TensorInfo& cellToForgetWInfo = *cellToForgetWeights; + aclCellToForgetWeightsInfo = BuildArmComputeTensorInfo(cellToForgetWInfo); + const armnn::TensorInfo& cellToOutputWInfo = *cellToOutputWeights; + aclCellToOutputWeightsInfo = BuildArmComputeTensorInfo(cellToOutputWInfo); + lstm_params_info.set_peephole_params(&aclCellToForgetWeightsInfo, &aclCellToOutputWeightsInfo); + } + + float cell_threshold = descriptor.m_ClippingThresCell; + float projection_threshold = descriptor.m_ClippingThresProj; + + // for preparing the object for the class ActivationLayerInfo, we need to consider 5 situations + arm_compute::ActivationLayerInfo activationLayerInfo; + if (descriptor.m_ActivationFunc == 0) + { + // no activation, do nothing + } + else if (descriptor.m_ActivationFunc == 1) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + } + else if (descriptor.m_ActivationFunc == 3) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.0); + } + else if (descriptor.m_ActivationFunc == 4) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::TANH, 1.0, 1.0); + } + else if (descriptor.m_ActivationFunc == 6) + { + activationLayerInfo = arm_compute::ActivationLayerInfo( + arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC); + } + else + { + throw armnn::Exception("Wrong Type of Activation Function!"); + } + + return arm_compute::CLLSTMLayer::validate(&aclInputInfo, &aclInputToForgetWeightsInfo, + &aclInputToCellWeightsInfo, + &aclInputToOutputWeightsInfo, + &aclRecurrentToForgetWeightsInfo, + &aclRecurrentToCellWeightsInfo, + &aclRecurrentToOutputWeightsInfo, + &aclForgetGateBiasInfo, + &aclCellBiasInfo, + &aclOutputGateBiasInfo, + &aclOutputStateInInfo, &aclCellStateInInfo, + &aclScratchBufferInfo, &aclOutputStateOutInfo, + &aclCellStateOutInfo, &aclOutputInfo, + lstm_params_info, activationLayerInfo, + cell_threshold, projection_threshold); +} + +void ClLstmFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_InputToInputWeightsTensor); + FreeTensorIfUnused(m_InputToForgetWeightsTensor); + FreeTensorIfUnused(m_InputToCellWeightsTensor); + FreeTensorIfUnused(m_InputToOutputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToInputWeightsTensor); + FreeTensorIfUnused(m_RecurrentToForgetWeightsTensor); + FreeTensorIfUnused(m_RecurrentToCellWeightsTensor); + FreeTensorIfUnused(m_RecurrentToOutputWeightsTensor); + FreeTensorIfUnused(m_CellToInputWeightsTensor); + FreeTensorIfUnused(m_CellToForgetWeightsTensor); + FreeTensorIfUnused(m_CellToOutputWeightsTensor); + FreeTensorIfUnused(m_InputGateBiasTensor); + FreeTensorIfUnused(m_ForgetGateBiasTensor); + FreeTensorIfUnused(m_CellBiasTensor); + FreeTensorIfUnused(m_OutputGateBiasTensor); + FreeTensorIfUnused(m_ProjectionWeightsTensor); + FreeTensorIfUnused(m_ProjectionBiasTensor); + FreeTensorIfUnused(m_ScratchBuffer); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp b/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp new file mode 100644 index 0000000000..61d8fc3e6c --- /dev/null +++ b/src/backends/ClWorkloads/ClLstmFloatWorkload.hpp @@ -0,0 +1,68 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include + +namespace armnn +{ + +class ClLstmFloatWorkload : public FloatWorkload +{ +public: + ClLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLLSTMLayer m_LstmLayer; + + std::unique_ptr m_InputToInputWeightsTensor; + std::unique_ptr m_InputToForgetWeightsTensor; + std::unique_ptr m_InputToCellWeightsTensor; + std::unique_ptr m_InputToOutputWeightsTensor; + std::unique_ptr m_RecurrentToInputWeightsTensor; + std::unique_ptr m_RecurrentToForgetWeightsTensor; + std::unique_ptr m_RecurrentToCellWeightsTensor; + std::unique_ptr m_RecurrentToOutputWeightsTensor; + std::unique_ptr m_CellToInputWeightsTensor; + std::unique_ptr m_CellToForgetWeightsTensor; + std::unique_ptr m_CellToOutputWeightsTensor; + std::unique_ptr m_InputGateBiasTensor; + std::unique_ptr m_ForgetGateBiasTensor; + std::unique_ptr m_CellBiasTensor; + std::unique_ptr m_OutputGateBiasTensor; + std::unique_ptr m_ProjectionWeightsTensor; + std::unique_ptr m_ProjectionBiasTensor; + + std::unique_ptr m_ScratchBuffer; + + void FreeUnusedTensors(); +}; + +arm_compute::Status ClLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor &descriptor, + const TensorInfo& inputToForgetWeights, + const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, + const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, + const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, + const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, + const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, + const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, + const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights); +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp b/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp new file mode 100644 index 0000000000..151f1e0ee7 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerFloatWorkload.cpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerFloatWorkload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp b/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp new file mode 100644 index 0000000000..9782f7a8f3 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerFloatWorkload : public ClBaseMergerWorkload +{ +public: + using ClBaseMergerWorkload::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClMergerUint8Workload.cpp b/src/backends/ClWorkloads/ClMergerUint8Workload.cpp new file mode 100644 index 0000000000..9d1060d857 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMergerUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerUint8Workload_Execute"); + ClBaseMergerWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMergerUint8Workload.hpp b/src/backends/ClWorkloads/ClMergerUint8Workload.hpp new file mode 100644 index 0000000000..cbfc19a0f2 --- /dev/null +++ b/src/backends/ClWorkloads/ClMergerUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseMergerWorkload.hpp" + +namespace armnn +{ + +class ClMergerUint8Workload : public ClBaseMergerWorkload +{ +public: + using ClBaseMergerWorkload::ClBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..c3330a98e8 --- /dev/null +++ b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClMultiplicationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::CLPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + + +ClMultiplicationFloatWorkload::ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClMultiplicationFloatWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + // Construct + m_PixelWiseMultiplication.configure(&input0, + &input1, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_NEAREST_EVEN); +} + +void ClMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationFloatWorkload_Execute"); + + // Executes the layer. + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..c2d6b7697a --- /dev/null +++ b/src/backends/ClWorkloads/ClMultiplicationFloatWorkload.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class ClMultiplicationFloatWorkload : public FloatWorkload +{ +public: + ClMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + + using FloatWorkload::FloatWorkload; + void Execute() const override; + +private: + mutable arm_compute::CLPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..d2625354ef --- /dev/null +++ b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.cpp @@ -0,0 +1,50 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClNormalizationFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo layerInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::CLNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +}; + +void ClNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..f02d0adb70 --- /dev/null +++ b/src/backends/ClWorkloads/ClNormalizationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class ClNormalizationFloatWorkload : public FloatWorkload +{ +public: + ClNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPermuteWorkload.cpp b/src/backends/ClWorkloads/ClPermuteWorkload.cpp new file mode 100644 index 0000000000..29d98bf0eb --- /dev/null +++ b/src/backends/ClWorkloads/ClPermuteWorkload.cpp @@ -0,0 +1,56 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPermuteWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor) +{ + const armnn::PermutationVector& perm = descriptor.m_DimMappings; + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(!perm.IsEqual({ 0U, 3U, 1U, 2U }) + && !perm.IsEqual({ 0U, 2U, 3U, 1U }) + && !perm.IsEqual({ 3U, 2U, 0U, 1U }), + "Only [0, 3, 1, 2], [0, 2, 3, 1] and [3, 2, 0, 1] permutations are supported"); + + return arm_compute::Status{}; +} + +template +ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template +void ClPermuteWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class ClPermuteWorkload; +template class ClPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClPermuteWorkload.hpp b/src/backends/ClWorkloads/ClPermuteWorkload.hpp new file mode 100644 index 0000000000..a1f3161921 --- /dev/null +++ b/src/backends/ClWorkloads/ClPermuteWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" +#include "backends/WorkloadData.hpp" + +#include +#include + +#include + +namespace armnn +{ + +arm_compute::Status ClPermuteWorkloadValidate(const PermuteDescriptor& descriptor); + +template +class ClPermuteWorkload : public TypedWorkload +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("ClPermuteWorkload"); + return name; + } + + ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload::m_Data; + mutable arm_compute::CLPermute m_PermuteFunction; +}; + +using ClPermuteFloatWorkload = ClPermuteWorkload; +using ClPermuteUint8Workload = ClPermuteWorkload; + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..a1ee50b39f --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dBaseWorkload.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/ArmComputeUtils.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::CLPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template +ClPooling2dBaseWorkload::ClPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + // Run the layer. + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class ClPooling2dBaseWorkload; +template class ClPooling2dBaseWorkload; + +} diff --git a/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..ea7ddfb41b --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dBaseWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template +class ClPooling2dBaseWorkload : public TypedWorkload +{ +public: + using TypedWorkload::m_Data; + + ClPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::CLPoolingLayer m_PoolingLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..dc9d17f0ae --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dFloatWorkload::ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dFloatWorkload") +{ +} + +void ClPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..71648d40f4 --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dFloatWorkload.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ +class ClPooling2dFloatWorkload : public ClPooling2dBaseWorkload +{ +public: + ClPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp b/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..0b4b15f806 --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dUint8Workload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClPooling2dUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClPooling2dUint8Workload::ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : ClPooling2dBaseWorkload(descriptor, info, "ClPooling2dUint8Workload") +{ +} + +void ClPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp b/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..2baf2aa708 --- /dev/null +++ b/src/backends/ClWorkloads/ClPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include "ClPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class ClPooling2dUint8Workload : public ClPooling2dBaseWorkload +{ +public: + ClPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp b/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..ea50436a66 --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClReshapeFloatWorkload::ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void ClReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp b/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..48265143e5 --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeFloatWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClReshapeFloatWorkload : public FloatWorkload +{ +public: + ClReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp b/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp new file mode 100644 index 0000000000..82bd93ef9c --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeUint8Workload.cpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClReshapeUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +ClReshapeUint8Workload::ClReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) + : Uint8Workload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClReshapeUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output); +} + +void ClReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeUint8Workload_Execute"); + + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp b/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp new file mode 100644 index 0000000000..c9801a3ae1 --- /dev/null +++ b/src/backends/ClWorkloads/ClReshapeUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +// Reshape +class ClReshapeUint8Workload : public Uint8Workload +{ +public: + ClReshapeUint8Workload( const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLReshapeLayer m_Layer; +}; + +} //namespace armnn + + diff --git a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp new file mode 100644 index 0000000000..8348afb76a --- /dev/null +++ b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.cpp @@ -0,0 +1,38 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClResizeBilinearFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ClLayerSupport.hpp" +#include "backends/ArmComputeUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClResizeBilinearFloatWorkload::ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload(descriptor, info) +{ + m_Data.ValidateInputsOutputs("ClResizeBilinearFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + m_ResizeBilinearLayer.configure(&input, &output, arm_compute::InterpolationPolicy::BILINEAR, + arm_compute::BorderMode::REPLICATE, arm_compute::PixelValue(0.f), + arm_compute::SamplingPolicy::TOP_LEFT); +}; + +void ClResizeBilinearFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeBilinearFloatWorkload_Execute"); + m_ResizeBilinearLayer.run(); +} + + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp new file mode 100644 index 0000000000..f2ee67f5dd --- /dev/null +++ b/src/backends/ClWorkloads/ClResizeBilinearFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +class ClResizeBilinearFloatWorkload : public FloatWorkload +{ +public: + ClResizeBilinearFloatWorkload(const ResizeBilinearQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::CLScale m_ResizeBilinearLayer; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..b4ea236d49 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxBaseWorkload.hpp" + +#include "backends/ArmComputeTensorUtils.hpp" + +#include + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLSoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo); +} + +} diff --git a/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..b800056cdf --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +namespace armnn +{ + +arm_compute::Status ClSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +} // namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..c34b5a2a74 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.cpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxFloatWorkload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxFloatWorkload::ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : FloatWorkload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxFloatWorkload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void ClSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..965b845cf8 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ + +class ClSoftmaxFloatWorkload : public FloatWorkload +{ +public: + ClSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + void Execute() const override; + +private: + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..1bb9628d74 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSoftmaxUint8Workload.hpp" +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +ClSoftmaxUint8Workload::ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager) + : Uint8Workload(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("ClSoftmaxUint8Workload", 1, 1); + + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void ClSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..29427a5976 --- /dev/null +++ b/src/backends/ClWorkloads/ClSoftmaxUint8Workload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include + +namespace armnn +{ +// Softmax +class ClSoftmaxUint8Workload : public Uint8Workload +{ +public: + ClSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr& memoryManager); + + void Execute() const override; +private: + + mutable arm_compute::CLSoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp b/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..5fd634bdb6 --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterFloatWorkload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterFloatWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterFloatWorkload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp b/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..a0b5846f8e --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class ClSplitterFloatWorkload : public ClBaseSplitterWorkload +{ +public: + using ClBaseSplitterWorkload::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp b/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp new file mode 100644 index 0000000000..50a251ada7 --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterUint8Workload.cpp @@ -0,0 +1,19 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSplitterUint8Workload.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ + +void ClSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterUint8Workload_Execute"); + ClBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp b/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp new file mode 100644 index 0000000000..19e8be5034 --- /dev/null +++ b/src/backends/ClWorkloads/ClSplitterUint8Workload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseSplitterWorkload.hpp" + +namespace armnn +{ +class ClSplitterUint8Workload : public ClBaseSplitterWorkload +{ +public: + using ClBaseSplitterWorkload::ClBaseSplitterWorkload; + virtual void Execute() const override; +}; +} //namespace armnn + + + diff --git a/src/backends/ClWorkloads/ClSubtractionWorkload.cpp b/src/backends/ClWorkloads/ClSubtractionWorkload.cpp new file mode 100644 index 0000000000..1c70130fa4 --- /dev/null +++ b/src/backends/ClWorkloads/ClSubtractionWorkload.cpp @@ -0,0 +1,66 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClSubtractionWorkload.hpp" + +#include "backends/ClTensorHandle.hpp" +#include "backends/CpuTensorHandle.hpp" +#include "backends/ArmComputeTensorUtils.hpp" + +#include "ClWorkloadUtils.hpp" + +namespace armnn +{ +using namespace armcomputetensorutils; + +static constexpr arm_compute::ConvertPolicy g_AclConvertPolicy = arm_compute::ConvertPolicy::SATURATE; + +template +ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("ClSubtractionWorkload", 2, 1); + + arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); +} + +template +void ClSubtractionWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); + m_Layer.run(); +} + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, + &aclInput1Info, + &aclOutputInfo, + g_AclConvertPolicy); + + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + + return supported; +} + +} //namespace armnn + +template class armnn::ClSubtractionWorkload; +template class armnn::ClSubtractionWorkload; diff --git a/src/backends/ClWorkloads/ClSubtractionWorkload.hpp b/src/backends/ClWorkloads/ClSubtractionWorkload.hpp new file mode 100644 index 0000000000..59a5f01e73 --- /dev/null +++ b/src/backends/ClWorkloads/ClSubtractionWorkload.hpp @@ -0,0 +1,31 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "backends/Workload.hpp" + +#include + +namespace armnn +{ + +template +class ClSubtractionWorkload : public TypedWorkload +{ +public: + ClSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; + +private: + mutable arm_compute::CLArithmeticSubtraction m_Layer; +}; + +bool ClSubtractionValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); +} //namespace armnn diff --git a/src/backends/ClWorkloads/ClWorkloadUtils.hpp b/src/backends/ClWorkloads/ClWorkloadUtils.hpp new file mode 100644 index 0000000000..6f1b155745 --- /dev/null +++ b/src/backends/ClWorkloads/ClWorkloadUtils.hpp @@ -0,0 +1,62 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "OpenClTimer.hpp" +#include "backends/ArmComputeTensorUtils.hpp" +#include "backends/CpuTensorHandle.hpp" + +#include + +#define ARMNN_SCOPED_PROFILING_EVENT_CL(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::GpuAcc, \ + name, \ + armnn::OpenClTimer(), \ + armnn::WallClockTimer()) + +namespace armnn +{ + +template +void CopyArmComputeClTensorData(const T* srcData, arm_compute::CLTensor& dstTensor) +{ + { + ARMNN_SCOPED_PROFILING_EVENT_CL("MapClTensorForWriting"); + dstTensor.map(true); + } + + { + ARMNN_SCOPED_PROFILING_EVENT_CL("CopyToClTensor"); + armcomputetensorutils::CopyArmComputeITensorData(srcData, dstTensor); + } + + dstTensor.unmap(); +} + +template +void InitialiseArmComputeClTensorData(arm_compute::CLTensor& clTensor, const T* data) +{ + armcomputetensorutils::InitialiseArmComputeTensorEmpty(clTensor); + CopyArmComputeClTensorData(data, clTensor); +} + +inline void InitializeArmComputeClTensorDataForFloatTypes(arm_compute::CLTensor& clTensor, + const ConstCpuTensorHandle *handle) +{ + BOOST_ASSERT(handle); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + InitialiseArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + case DataType::Float32: + InitialiseArmComputeClTensorData(clTensor, handle->GetConstTensor()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected floating point type."); + } +}; + +} //namespace armnn diff --git a/src/backends/ClWorkloads/backend.cmake b/src/backends/ClWorkloads/backend.cmake new file mode 100644 index 0000000000..6f8eda1628 --- /dev/null +++ b/src/backends/ClWorkloads/backend.cmake @@ -0,0 +1,9 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTECL) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/ClWorkloads) + list(APPEND armnnLibraries armnnClBackend) +endif() -- cgit v1.2.1