diff options
Diffstat (limited to 'src/backends/neon')
84 files changed, 4387 insertions, 0 deletions
diff --git a/src/backends/neon/CMakeLists.txt b/src/backends/neon/CMakeLists.txt new file mode 100644 index 0000000000..c748825464 --- /dev/null +++ b/src/backends/neon/CMakeLists.txt @@ -0,0 +1,28 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTENEON) + list(APPEND armnnNeonBackend_sources + NeonLayerSupport.cpp + NeonLayerSupport.hpp + NeonWorkloadFactory.cpp + NeonWorkloadFactory.hpp + NeonTensorHandle.hpp + ) + + add_subdirectory(workloads test) +else() + list(APPEND armnnNeonBackend_sources + NeonLayerSupport.cpp + NeonLayerSupport.hpp + NeonWorkloadFactory.cpp + NeonWorkloadFactory.hpp + ) +endif() + +add_library(armnnNeonBackend STATIC ${armnnNeonBackend_sources}) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnNeonBackend PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp new file mode 100644 index 0000000000..dfaea5c81c --- /dev/null +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -0,0 +1,468 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonLayerSupport.hpp" + +#include <LayerSupportCommon.hpp> +#include <InternalTypes.hpp> + +#include <armnn/Descriptors.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +#include <boost/core/ignore_unused.hpp> + +#ifdef ARMCOMPUTENEON_ENABLED +#include "workloads/NeonAdditionFloatWorkload.hpp" +#include "workloads/NeonActivationFloatWorkload.hpp" +#include "workloads/NeonBatchNormalizationFloatWorkload.hpp" +#include "workloads/NeonConvolution2dBaseWorkload.hpp" +#include "workloads/NeonDepthwiseConvolutionBaseWorkload.hpp" +#include "workloads/NeonL2NormalizationFloatWorkload.hpp" +#include "workloads/NeonMultiplicationFloatWorkload.hpp" +#include "workloads/NeonNormalizationFloatWorkload.hpp" +#include "workloads/NeonFullyConnectedWorkload.hpp" +#include "workloads/NeonPermuteWorkload.hpp" +#include "workloads/NeonPooling2dBaseWorkload.hpp" +#include "workloads/NeonSoftmaxBaseWorkload.hpp" +#include "workloads/NeonSubtractionFloatWorkload.hpp" +#endif + +using namespace boost; + +namespace armnn +{ + +bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) +{ + // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases, + // and complement with NEDirectConvolutionLayerKernel::configure() implementation. + + // Only 1x1 is using direct convolution. Performance results and details are in: + // https://jira.arm.com/browse/IVGCVSW-1003 + // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc + + const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32); + + // Strides: 1|2|3 + const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) && + (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3); + + auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value) + { + return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value || + conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value; + }; + + // Supported sizes and padding. + const bool sizeAndPaddingSupported = + // Pad > 0 not supported for 1x1 weights. + (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u)); + + const bool preferDirectConvolution = dataTypeSupported && + strideSupported && + sizeAndPaddingSupported && + // NEDirectConvolutionLayerKernel doesn't support NULL bias. + desc.m_BiasEnabled; + return preferDirectConvolution; +} + +bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, const NormalizationDescriptor& parameters) +{ + if (parameters.m_NormMethodType != NormalizationAlgorithmMethod::LocalBrightness) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Unsupported normalisation method type, only LocalBrightness is supported"; + } + return false; + } + if (parameters.m_NormSize % 2 == 0) + { + if (reasonIfUnsupported) + { + *reasonIfUnsupported = "Normalization size must be an odd number."; + } + return false; + } + + return true; +} + +bool IsNeonBackendSupported(std::string* reasonIfUnsupported) +{ +#if ARMCOMPUTENEON_ENABLED + return true; +#else + if (reasonIfUnsupported != nullptr) + { + *reasonIfUnsupported = "The armnn library has been built without NEON support"; + } + return false; +#endif +} + +template<typename FloatFunc, typename Uint8Func, typename ... Params> +bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported, + DataType dataType, + FloatFunc floatFuncPtr, + Uint8Func uint8FuncPtr, + Params&&... params) +{ + return IsNeonBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + dataType, + floatFuncPtr, + floatFuncPtr, + uint8FuncPtr, + std::forward<Params>(params)...); +} + +#if ARMCOMPUTENEON_ENABLED +template<class FuncType, class... Args> +inline bool IsWorkloadSupported(FuncType& func, std::string* reasonIfUnsupported, Args&&... args) +{ + arm_compute::Status aclStatus = func(std::forward<Args>(args)...); + const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); + if (!supported && reasonIfUnsupported) + { + *reasonIfUnsupported = aclStatus.error_description(); + } + return supported; +} + +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); +#else +#define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ + return IsNeonBackendSupported(reasonIfUnsupported); +#endif + +bool IsActivationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); +} + +bool IsAdditionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate, + reasonIfUnsupported, + input, + output, + mean, + var, + beta, + gamma, + descriptor); +} + +bool IsConstantSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsConvolution2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); +} + +bool IsDivisionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + // At the moment division is not supported + return false; +} + +bool IsSubtractionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + // At the moment U8 is unsupported + if (input.GetDataType() == DataType::QuantisedAsymm8) + { + return false; + } + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate, + reasonIfUnsupported, + input, + output, + weights, + biases, + descriptor); +} + +bool IsInputSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output); +} + +bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsMultiplicationSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); +} + +bool IsNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsOutputSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + output.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsPermuteSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsPooling2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsResizeBilinearSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + return false; +} + +bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor); +} + +bool IsSplitterSupportedNeon(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(descriptor); + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(descriptor); + return false; +} + +bool IsReshapeSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported) +{ + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + input.GetDataType(), + &TrueFunc<>, + &TrueFunc<>); +} + +bool IsFloorSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(output); + return IsNeonBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(outputStateIn); + ignore_unused(cellStateIn); + ignore_unused(scratchBuffer); + ignore_unused(outputStateOut); + ignore_unused(cellStateOut); + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(inputToForgetWeights); + ignore_unused(inputToCellWeights); + ignore_unused(inputToOutputWeights); + ignore_unused(recurrentToForgetWeights); + ignore_unused(recurrentToCellWeights); + ignore_unused(recurrentToOutputWeights); + ignore_unused(forgetGateBias); + ignore_unused(cellBias); + ignore_unused(outputGateBias); + ignore_unused(inputToInputWeights); + ignore_unused(recurrentToInputWeights); + ignore_unused(cellToInputWeights); + ignore_unused(inputGateBias); + ignore_unused(projectionWeights); + ignore_unused(projectionBias); + ignore_unused(cellToForgetWeights); + ignore_unused(cellToOutputWeights); + return false; +} + +bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + return true; +} + +bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + return true; +} + +bool IsMeanSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported) +{ + return false; +} + +} diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp new file mode 100644 index 0000000000..95b14b3ba6 --- /dev/null +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -0,0 +1,163 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <armnn/DescriptorsFwd.hpp> +#include <armnn/Types.hpp> +#include <armnn/Tensor.hpp> + +#include <boost/optional.hpp> + +namespace armnn +{ + +bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc); + +bool IsNeonNormalizationDescParamsSupported(std::string* reasonIfUnsupported, + const NormalizationDescriptor& parameters); + +bool IsActivationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor, + std::string* reasonIfUnsupported); + +bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, + const DepthwiseConvolution2dDescriptor& parameters, + const TensorInfo& weights); + +bool IsAdditionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported); + +bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsConstantSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvolution2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported = nullptr); + + +bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases, + std::string* reasonIfUnsupported = nullptr); + +bool IsDivisionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsSubtractionSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsInputSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, + const OriginsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsMultiplicationSupportedNeon(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsOutputSupportedNeon(const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsPermuteSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsPooling2dSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsResizeBilinearSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsSplitterSupportedNeon(const TensorInfo& input, + const ViewsDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsFakeQuantizationSupportedNeon(const TensorInfo& input, + const FakeQuantizationDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +bool IsReshapeSupportedNeon(const TensorInfo& input, + std::string* reasonIfUnsupported = nullptr); + +bool IsFloorSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported = nullptr); + +bool IsMeanSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const MeanDescriptor& descriptor, + std::string* reasonIfUnsupported = nullptr); + +} diff --git a/src/backends/neon/NeonTensorHandle.hpp b/src/backends/neon/NeonTensorHandle.hpp new file mode 100644 index 0000000000..655427859b --- /dev/null +++ b/src/backends/neon/NeonTensorHandle.hpp @@ -0,0 +1,142 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <backends/OutputHandler.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/runtime/MemoryGroup.h> +#include <arm_compute/runtime/IMemoryGroup.h> +#include <arm_compute/runtime/Tensor.h> +#include <arm_compute/runtime/SubTensor.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/Coordinates.h> + +#include <boost/polymorphic_pointer_cast.hpp> + +namespace armnn +{ + +class INeonTensorHandle : public ITensorHandle +{ +public: + virtual arm_compute::ITensor& GetTensor() = 0; + virtual arm_compute::ITensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) = 0; +}; + +class NeonTensorHandle : public INeonTensorHandle +{ +public: + NeonTensorHandle(const TensorInfo& tensorInfo) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + NeonTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override + { + armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); + }; + + virtual void Manage() override + { + BOOST_ASSERT(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override + { + m_MemoryGroup = boost::polymorphic_pointer_downcast<arm_compute::MemoryGroup>(memoryGroup); + } + + virtual const void* Map(bool /* blocking = true */) const override + { + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + arm_compute::Tensor m_Tensor; + std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup; +}; + +class NeonSubTensorHandle : public INeonTensorHandle +{ +public: + NeonSubTensorHandle(INeonTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::ITensor& GetTensor() override { return m_Tensor; } + arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual ITensorHandle::Type GetType() const override { return ITensorHandle::Neon; } + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} + + virtual const void* Map(bool /* blocking = true */) const override + { + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } +private: + arm_compute::SubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; +}; + +} diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp new file mode 100644 index 0000000000..dd91b152f2 --- /dev/null +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -0,0 +1,494 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NeonWorkloadFactory.hpp" +#include <armnn/Utils.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <Layer.hpp> + +#ifdef ARMCOMPUTENEON_ENABLED +#include <arm_compute/runtime/Allocator.h> + +#include <backends/MemCopyWorkload.hpp> +#include "NeonTensorHandle.hpp" +#include "workloads/NeonWorkloadUtils.hpp" +#include "workloads/NeonWorkloads.hpp" + +#include <memory/IPoolManager.hpp> +#endif + +#include <backends/MakeWorkloadHelper.hpp> + +#include <boost/polymorphic_cast.hpp> + +namespace armnn +{ + +bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, + std::string& outReasonIfUnsupported) +{ + return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported); +} + +#ifdef ARMCOMPUTENEON_ENABLED + +NeonWorkloadFactory::NeonWorkloadFactory() + : m_MemoryManager(std::make_unique<arm_compute::Allocator>(), BaseMemoryManager::MemoryAffinity::Offset) +{ +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + BOOST_ASSERT(parent.GetType() == ITensorHandle::Neon); + + const arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); + + arm_compute::Coordinates coords; + coords.set_num_dimensions(subTensorShape.GetNumDimensions()); + for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) + { + // Arm compute indexes tensor coords in reverse order. + unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; + coords.set(i, boost::numeric_cast<int>(subTensorOrigin[revertedIndex])); + } + + return std::make_unique<NeonSubTensorHandle>( + boost::polymorphic_downcast<INeonTensorHandle*>(&parent), shape, coords); +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + auto tensorHandle = std::make_unique<NeonTensorHandle>(tensorInfo, dataLayout); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonActivationFloatWorkload, NeonActivationUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonSoftmaxFloatWorkload, NeonSoftmaxUint8Workload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonSplitterFloatWorkload, NeonSplitterUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonMergerFloatWorkload, NeonMergerUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateFullyConnected( + const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonFullyConnectedWorkload, NeonFullyConnectedWorkload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonPermuteFloatWorkload, NeonPermuteUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonPooling2dFloatWorkload, NeonPooling2dUint8Workload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonConvolution2dFloatWorkload, NeonConvolution2dUint8Workload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonDepthwiseConvolutionFloatWorkload, NeonDepthwiseConvolutionUint8Workload>( + descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateNormalization( + const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonNormalizationFloatWorkload, NullWorkload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonAdditionFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMultiplication( + const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonMultiplicationFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateDivision( + const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateSubtraction( + const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonSubtractionFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateBatchNormalization( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return MakeWorkload<NeonBatchNormalizationFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) + { + throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload"); + } + + return MakeWorkload<CopyMemGenericWorkload, CopyMemGenericWorkload>(descriptor, info); +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateResizeBilinear( + const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonL2NormalizationFloatWorkload, NullWorkload>(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonConstantFloatWorkload, NeonConstantUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonReshapeFloatWorkload, NeonReshapeUint8Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonFloorFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NeonLstmFloatWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<NeonConvertFp16ToFp32Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique<NeonConvertFp32ToFp16Workload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload<NullWorkload, NullWorkload>(descriptor, info); +} + +void NeonWorkloadFactory::Finalize() +{ + m_MemoryManager.Finalize(); +} + +void NeonWorkloadFactory::Release() +{ + m_MemoryManager.Release(); +} + +void NeonWorkloadFactory::Acquire() +{ + m_MemoryManager.Acquire(); +} + +#else // Compiled without ArmCompute libs + +NeonWorkloadFactory::NeonWorkloadFactory() +{ +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const +{ + return nullptr; +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const +{ + return nullptr; +} + +std::unique_ptr<ITensorHandle> NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateBatchNormalization(const BatchNormalizationQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFakeQuantization( + const FakeQuantizationQueueDescriptor& descriptor, const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& data, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr<IWorkload> NeonWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +void NeonWorkloadFactory::Finalize() +{} + +void NeonWorkloadFactory::Release() +{} + +void NeonWorkloadFactory::Acquire() +{} + +#endif + +} //namespace armnn diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp new file mode 100644 index 0000000000..440bba672a --- /dev/null +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -0,0 +1,138 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <backends/OutputHandler.hpp> + +#include <memory/BaseMemoryManager.hpp> + +#include <boost/core/ignore_unused.hpp> +#include <boost/optional.hpp> + +namespace armnn +{ + +// Neon workload factory. +class NeonWorkloadFactory : public IWorkloadFactory +{ +public: + NeonWorkloadFactory(); + + virtual Compute GetCompute() const override { return Compute::CpuAcc; } + + static bool IsLayerSupported(const Layer& layer, boost::optional<DataType> dataType, + std::string& outReasonIfUnsupported); + + virtual bool SupportsSubTensors() const override { return true; } + + virtual std::unique_ptr<ITensorHandle> CreateSubTensorHandle(ITensorHandle& parent, + TensorShape const& subTensorShape, + unsigned int const* subTensorOrigin) const override; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo) const override; + + virtual std::unique_ptr<ITensorHandle> CreateTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout) const override; + + virtual std::unique_ptr<IWorkload> CreateInput(const InputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateOutput(const OutputQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateActivation(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSplitter(const SplitterQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMerger(const MergerQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePermute(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDepthwiseConvolution2d( + const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateNormalization(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateAddition(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateBatchNormalization(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMemCopy(const MemCopyQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateResizeBilinear(const ResizeBilinearQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFakeQuantization(const FakeQuantizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConstant(const ConstantQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateReshape(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateFloor(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateDivision(const DivisionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateSubtraction(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual std::unique_ptr<IWorkload> CreateMean(const MeanQueueDescriptor& descriptor, + const WorkloadInfo& Info) const override; + + virtual std::unique_ptr<IWorkload> CreatePad(const PadQueueDescriptor& descriptor, + const WorkloadInfo& info) const override; + + virtual void Finalize() override; + + virtual void Release() override; + + virtual void Acquire() override; + +private: +#ifdef ARMCOMPUTENEON_ENABLED + mutable NeonMemoryManager m_MemoryManager; +#endif +}; + +} //namespace armnn diff --git a/src/backends/neon/backend.cmake b/src/backends/neon/backend.cmake new file mode 100644 index 0000000000..5f02c845ed --- /dev/null +++ b/src/backends/neon/backend.cmake @@ -0,0 +1,13 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +if(ARMCOMPUTENEON) + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/neon) + list(APPEND armnnLibraries armnnNeonBackend armnnNeonBackendWorkloads) +else() + message("NEON backend is disabled") + add_subdirectory(${PROJECT_SOURCE_DIR}/src/backends/neon) + list(APPEND armnnLibraries armnnNeonBackend) +endif() diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk new file mode 100644 index 0000000000..a59966fb39 --- /dev/null +++ b/src/backends/neon/backend.mk @@ -0,0 +1,47 @@ +# +# Copyright © 2017 ARM Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +# BACKEND_SOURCES contains the list of files to be included +# in the Android build and it is picked up by the Android.mk +# file in the root of ArmNN + +BACKEND_SOURCES := \ + NeonLayerSupport.cpp \ + NeonWorkloadFactory.cpp \ + workloads/NeonActivationFloatWorkload.cpp \ + workloads/NeonActivationUint8Workload.cpp \ + workloads/NeonAdditionFloatWorkload.cpp \ + workloads/NeonBatchNormalizationFloatWorkload.cpp \ + workloads/NeonConstantFloatWorkload.cpp \ + workloads/NeonConstantUint8Workload.cpp \ + workloads/NeonConvertFp16ToFp32Workload.cpp \ + workloads/NeonConvertFp32ToFp16Workload.cpp \ + workloads/NeonConvolution2dBaseWorkload.cpp \ + workloads/NeonConvolution2dFloatWorkload.cpp \ + workloads/NeonConvolution2dUint8Workload.cpp \ + workloads/NeonDepthwiseConvolutionBaseWorkload.cpp \ + workloads/NeonDepthwiseConvolutionFloatWorkload.cpp \ + workloads/NeonDepthwiseConvolutionUint8Workload.cpp \ + workloads/NeonFloorFloatWorkload.cpp \ + workloads/NeonFullyConnectedWorkload.cpp \ + workloads/NeonL2NormalizationFloatWorkload.cpp \ + workloads/NeonLstmFloatWorkload.cpp \ + workloads/NeonMergerFloatWorkload.cpp \ + workloads/NeonMergerUint8Workload.cpp \ + workloads/NeonMultiplicationFloatWorkload.cpp \ + workloads/NeonNormalizationFloatWorkload.cpp \ + workloads/NeonPermuteWorkload.cpp \ + workloads/NeonPooling2dBaseWorkload.cpp \ + workloads/NeonPooling2dFloatWorkload.cpp \ + workloads/NeonPooling2dUint8Workload.cpp \ + workloads/NeonReshapeFloatWorkload.cpp \ + workloads/NeonReshapeUint8Workload.cpp \ + workloads/NeonSoftmaxBaseWorkload.cpp \ + workloads/NeonSoftmaxFloatWorkload.cpp \ + workloads/NeonSoftmaxUint8Workload.cpp \ + workloads/NeonSplitterFloatWorkload.cpp \ + workloads/NeonSplitterUint8Workload.cpp \ + workloads/NeonSubtractionFloatWorkload.cpp \ + workloads/NeonWorkloadUtils.cpp diff --git a/src/backends/neon/test/CMakeLists.txt b/src/backends/neon/test/CMakeLists.txt new file mode 100644 index 0000000000..f41a074999 --- /dev/null +++ b/src/backends/neon/test/CMakeLists.txt @@ -0,0 +1,4 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt new file mode 100644 index 0000000000..850c65cb4e --- /dev/null +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -0,0 +1,86 @@ +# +# Copyright © 2017 Arm Ltd. All rights reserved. +# SPDX-License-Identifier: MIT +# + +list(APPEND armnnNeonBackendWorkloads_sources + NeonActivationFloatWorkload.cpp + NeonActivationFloatWorkload.hpp + NeonActivationUint8Workload.cpp + NeonActivationUint8Workload.hpp + NeonAdditionFloatWorkload.cpp + NeonAdditionFloatWorkload.hpp + NeonBaseConstantWorkload.hpp + NeonBaseMergerWorkload.hpp + NeonBaseSplitterWorkload.hpp + NeonBatchNormalizationFloatWorkload.cpp + NeonBatchNormalizationFloatWorkload.hpp + NeonConstantFloatWorkload.cpp + NeonConstantFloatWorkload.hpp + NeonConstantUint8Workload.cpp + NeonConstantUint8Workload.hpp + NeonConvertFp16ToFp32Workload.cpp + NeonConvertFp16ToFp32Workload.hpp + NeonConvertFp32ToFp16Workload.cpp + NeonConvertFp32ToFp16Workload.hpp + NeonConvolution2dBaseWorkload.cpp + NeonConvolution2dBaseWorkload.hpp + NeonConvolution2dFloatWorkload.cpp + NeonConvolution2dFloatWorkload.hpp + NeonConvolution2dUint8Workload.cpp + NeonConvolution2dUint8Workload.hpp + NeonDepthwiseConvolutionBaseWorkload.cpp + NeonDepthwiseConvolutionBaseWorkload.hpp + NeonDepthwiseConvolutionFloatWorkload.cpp + NeonDepthwiseConvolutionFloatWorkload.hpp + NeonDepthwiseConvolutionUint8Workload.cpp + NeonDepthwiseConvolutionUint8Workload.hpp + NeonFloorFloatWorkload.cpp + NeonFloorFloatWorkload.hpp + NeonFullyConnectedWorkload.cpp + NeonFullyConnectedWorkload.hpp + NeonL2NormalizationFloatWorkload.cpp + NeonL2NormalizationFloatWorkload.hpp + NeonLstmFloatWorkload.cpp + NeonLstmFloatWorkload.hpp + NeonMergerFloatWorkload.cpp + NeonMergerFloatWorkload.hpp + NeonMergerUint8Workload.cpp + NeonMergerUint8Workload.hpp + NeonMultiplicationFloatWorkload.cpp + NeonMultiplicationFloatWorkload.hpp + NeonNormalizationFloatWorkload.cpp + NeonNormalizationFloatWorkload.hpp + NeonPermuteWorkload.cpp + NeonPermuteWorkload.hpp + NeonPooling2dBaseWorkload.cpp + NeonPooling2dBaseWorkload.hpp + NeonPooling2dFloatWorkload.cpp + NeonPooling2dFloatWorkload.hpp + NeonPooling2dUint8Workload.cpp + NeonPooling2dUint8Workload.hpp + NeonReshapeFloatWorkload.cpp + NeonReshapeFloatWorkload.hpp + NeonReshapeUint8Workload.cpp + NeonReshapeUint8Workload.hpp + NeonSoftmaxBaseWorkload.cpp + NeonSoftmaxBaseWorkload.hpp + NeonSoftmaxFloatWorkload.cpp + NeonSoftmaxFloatWorkload.hpp + NeonSoftmaxUint8Workload.cpp + NeonSoftmaxUint8Workload.hpp + NeonSplitterFloatWorkload.cpp + NeonSplitterFloatWorkload.hpp + NeonSplitterUint8Workload.cpp + NeonSplitterUint8Workload.hpp + NeonSubtractionFloatWorkload.cpp + NeonSubtractionFloatWorkload.hpp + NeonWorkloads.hpp + NeonWorkloadUtils.cpp + NeonWorkloadUtils.hpp +) + +add_library(armnnNeonBackendWorkloads STATIC ${armnnNeonBackendWorkloads_sources}) +target_include_directories(armnnNeonBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src) +target_include_directories(armnnNeonBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnn) +target_include_directories(armnnNeonBackendWorkloads PRIVATE ${PROJECT_SOURCE_DIR}/src/armnnUtils) diff --git a/src/backends/neon/workloads/NeonActivationFloatWorkload.cpp b/src/backends/neon/workloads/NeonActivationFloatWorkload.cpp new file mode 100644 index 0000000000..1d6bf70431 --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationFloatWorkload.cpp @@ -0,0 +1,57 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeUtils.hpp> + + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(descriptor); + + if (input.GetDataType() == DataType::QuantisedAsymm8 && + activationLayerInfo.activation() == arm_compute::ActivationLayerInfo::ActivationFunction::LOGISTIC) + { + return arm_compute::Status{arm_compute::ErrorCode::RUNTIME_ERROR, + "Neon: Logistic Activations unsupported with QAsymm8 data type."}; + } + + return arm_compute::NEActivationLayer::validate(&aclInput, + &aclOutput, + activationLayerInfo); +} + +NeonActivationFloatWorkload::NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<ActivationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonActivationFloatWorkload", 1, 1); + + const arm_compute::ActivationLayerInfo activationLayerInfo = + ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, activationLayerInfo); +} + +void NeonActivationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationFloatWorkload_Execute"); + m_ActivationLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonActivationFloatWorkload.hpp b/src/backends/neon/workloads/NeonActivationFloatWorkload.hpp new file mode 100644 index 0000000000..4d2f51fb4f --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonActivationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const ActivationDescriptor& descriptor); + +class NeonActivationFloatWorkload : public FloatWorkload<ActivationQueueDescriptor> +{ +public: + NeonActivationFloatWorkload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonActivationUint8Workload.cpp b/src/backends/neon/workloads/NeonActivationUint8Workload.cpp new file mode 100644 index 0000000000..4aed6b510f --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonActivationUint8Workload.hpp" +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/neon/NeonLayerSupport.hpp> + +namespace armnn +{ +NeonActivationUint8Workload::NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ActivationQueueDescriptor>(descriptor, info) +{ + auto activation = ConvertActivationFunctionToAclActivationFunction(m_Data.m_Parameters.m_Function); + arm_compute::ActivationLayerInfo layerInfo(activation, + m_Data.m_Parameters.m_A, + m_Data.m_Parameters.m_B); + + m_Data.ValidateInputsOutputs("NeonActivationUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<NeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<NeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_ActivationLayer.configure(&input, &output, layerInfo); +} + +void NeonActivationUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationUint8Workload_Execute"); + + m_ActivationLayer.run(); +} +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonActivationUint8Workload.hpp b/src/backends/neon/workloads/NeonActivationUint8Workload.hpp new file mode 100644 index 0000000000..56e3544379 --- /dev/null +++ b/src/backends/neon/workloads/NeonActivationUint8Workload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonActivationUint8Workload : public Uint8Workload<ActivationQueueDescriptor> +{ +public: + NeonActivationUint8Workload(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEActivationLayer m_ActivationLayer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/neon/workloads/NeonAdditionFloatWorkload.cpp b/src/backends/neon/workloads/NeonAdditionFloatWorkload.cpp new file mode 100644 index 0000000000..445e32ea44 --- /dev/null +++ b/src/backends/neon/workloads/NeonAdditionFloatWorkload.cpp @@ -0,0 +1,48 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonAdditionFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticAddition::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + + +NeonAdditionFloatWorkload::NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<AdditionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonAdditionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_AddLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonAdditionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionFloatWorkload_Execute"); + m_AddLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonAdditionFloatWorkload.hpp b/src/backends/neon/workloads/NeonAdditionFloatWorkload.hpp new file mode 100644 index 0000000000..769492e949 --- /dev/null +++ b/src/backends/neon/workloads/NeonAdditionFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonAdditionFloatWorkload : public FloatWorkload<AdditionQueueDescriptor> +{ +public: + NeonAdditionFloatWorkload(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticAddition m_AddLayer; +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonBaseConstantWorkload.hpp b/src/backends/neon/workloads/NeonBaseConstantWorkload.hpp new file mode 100644 index 0000000000..6bb275ac13 --- /dev/null +++ b/src/backends/neon/workloads/NeonBaseConstantWorkload.hpp @@ -0,0 +1,82 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <arm_compute/core/Types.h> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/Workload.hpp> +#include <Half.hpp> + +#include <boost/cast.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Constant layer common to all data types. +template <armnn::DataType... DataFormats> +class NeonBaseConstantWorkload : public TypedWorkload<ConstantQueueDescriptor, DataFormats...> +{ +public: + NeonBaseConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) + : TypedWorkload<ConstantQueueDescriptor, DataFormats...>(descriptor, info) + , m_RanOnce(false) + { + } + + virtual void Execute() const override + { + using namespace armcomputetensorutils; + + // The intermediate tensor held by the corresponding layer output handler can be initialised with the + // given data on the first inference, then reused for subsequent inferences. + // The initialisation cannot happen at workload construction time since the ACL kernel for the next layer + // may not have been configured at the time. + if (!m_RanOnce) + { + const ConstantQueueDescriptor& data = this->m_Data; + + BOOST_ASSERT(data.m_LayerOutput != nullptr); + arm_compute::ITensor& output = + boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetTensor(); + arm_compute::DataType computeDataType = + boost::polymorphic_downcast<NeonTensorHandle*>(data.m_Outputs[0])->GetDataType(); + + switch (computeDataType) + { + case arm_compute::DataType::F16: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<Half>(), output); + break; + } + case arm_compute::DataType::F32: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<float>(), output); + break; + } + case arm_compute::DataType::QASYMM8: + { + CopyArmComputeITensorData(data.m_LayerOutput->GetConstTensor<uint8_t>(), output); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown data type"); + break; + } + } + + m_RanOnce = true; + } + } + +private: + mutable bool m_RanOnce; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonBaseMergerWorkload.hpp b/src/backends/neon/workloads/NeonBaseMergerWorkload.hpp new file mode 100644 index 0000000000..9ff09f6c7c --- /dev/null +++ b/src/backends/neon/workloads/NeonBaseMergerWorkload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <backends/Workload.hpp> + +namespace armnn +{ +// Base class template providing an implementation of the Merger layer common to all data types. +template <armnn::DataType... DataTypes> +class NeonBaseMergerWorkload : public TypedWorkload<MergerQueueDescriptor, DataTypes...> +{ +public: + using TypedWorkload<MergerQueueDescriptor, DataTypes...>::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, merger is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonBaseSplitterWorkload.hpp b/src/backends/neon/workloads/NeonBaseSplitterWorkload.hpp new file mode 100644 index 0000000000..dcee93363d --- /dev/null +++ b/src/backends/neon/workloads/NeonBaseSplitterWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +// Base class template providing an implementation of the Splitter layer common to all data types. +template <armnn::DataType... DataTypes> +class NeonBaseSplitterWorkload : public TypedWorkload<SplitterQueueDescriptor, DataTypes...> +{ +public: + using TypedWorkload<SplitterQueueDescriptor, DataTypes...>::TypedWorkload; + + virtual void Execute() const override + { + // With subtensors, splitter is a no-op. + } +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..2383e78df3 --- /dev/null +++ b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.cpp @@ -0,0 +1,96 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonBatchNormalizationFloatWorkload.hpp" +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <armnn/ArmNN.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclMeanInfo = BuildArmComputeTensorInfo(mean); + const arm_compute::TensorInfo aclVarInfo = BuildArmComputeTensorInfo(var); + const arm_compute::TensorInfo aclBetaInfo = BuildArmComputeTensorInfo(beta); + const arm_compute::TensorInfo aclGammaInfo = BuildArmComputeTensorInfo(gamma); + + return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, + &aclOutputInfo, + &aclMeanInfo, + &aclVarInfo, + &aclBetaInfo, + &aclGammaInfo, + descriptor.m_Eps); +} + +NeonBatchNormalizationFloatWorkload::NeonBatchNormalizationFloatWorkload( + const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) + : FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonBatchNormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Mean = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); + + m_Variance = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Variance, m_Data.m_Variance->GetTensorInfo()); + + m_Gamma = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Gamma, m_Data.m_Gamma->GetTensorInfo()); + + m_Beta = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + + m_Layer.configure(&input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps); + + InitializeArmComputeTensorDataForFloatTypes(*m_Mean, m_Data.m_Mean); + InitializeArmComputeTensorDataForFloatTypes(*m_Variance, m_Data.m_Variance); + InitializeArmComputeTensorDataForFloatTypes(*m_Gamma, m_Data.m_Gamma); + InitializeArmComputeTensorDataForFloatTypes(*m_Beta, m_Data.m_Beta); + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_Layer.prepare(); + FreeUnusedTensors(); +} + +void NeonBatchNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +void NeonBatchNormalizationFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_Mean); + FreeTensorIfUnused(m_Variance); + FreeTensorIfUnused(m_Gamma); + FreeTensorIfUnused(m_Beta); +} + +} //namespace armnn + + diff --git a/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..59c7404c44 --- /dev/null +++ b/src/backends/neon/workloads/NeonBatchNormalizationFloatWorkload.hpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, + const BatchNormalizationDescriptor& descriptor); + +class NeonBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor> +{ +public: + NeonBatchNormalizationFloatWorkload(const BatchNormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEBatchNormalizationLayer m_Layer; + + std::unique_ptr<arm_compute::Tensor> m_Mean; + std::unique_ptr<arm_compute::Tensor> m_Variance; + std::unique_ptr<arm_compute::Tensor> m_Gamma; + std::unique_ptr<arm_compute::Tensor> m_Beta; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonConstantFloatWorkload.cpp b/src/backends/neon/workloads/NeonConstantFloatWorkload.cpp new file mode 100644 index 0000000000..dbdd057101 --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantFloatWorkload.hpp" + +namespace armnn +{ + +void NeonConstantFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantFloatWorkload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConstantFloatWorkload.hpp b/src/backends/neon/workloads/NeonConstantFloatWorkload.hpp new file mode 100644 index 0000000000..c35b5fda3e --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantFloatWorkload : public NeonBaseConstantWorkload<DataType::Float16, DataType::Float32> +{ +public: + using NeonBaseConstantWorkload<DataType::Float16, DataType::Float32>::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConstantUint8Workload.cpp b/src/backends/neon/workloads/NeonConstantUint8Workload.cpp new file mode 100644 index 0000000000..c607d86844 --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConstantUint8Workload.hpp" + +namespace armnn +{ + +void NeonConstantUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantUint8Workload_Execute"); + NeonBaseConstantWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConstantUint8Workload.hpp b/src/backends/neon/workloads/NeonConstantUint8Workload.hpp new file mode 100644 index 0000000000..2cb9516afe --- /dev/null +++ b/src/backends/neon/workloads/NeonConstantUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseConstantWorkload.hpp" + +namespace armnn +{ + +class NeonConstantUint8Workload : public NeonBaseConstantWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseConstantWorkload<DataType::QuantisedAsymm8>::NeonBaseConstantWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp new file mode 100644 index 0000000000..86ec31c71d --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp16ToFp32Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp16ToFp32Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast<const Half*>(src); + auto output = reinterpret_cast<float*>(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat16To32(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp new file mode 100644 index 0000000000..dcf6998c64 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload<ConvertFp16ToFp32QueueDescriptor> +{ +public: + NeonConvertFp16ToFp32Workload(const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp new file mode 100644 index 0000000000..0f4fbe4e93 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvertFp32ToFp16Workload.hpp" + +#include "Half.hpp" +#include "FloatingPointConverter.hpp" + +#include "Profiling.hpp" +#include "backends/WorkloadUtils.hpp" + +namespace armnn +{ + +NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) + : Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor>(descriptor, info) +{ + this->m_Data.ValidateInputsOutputs("NeonConvertFp32ToFp16Workload", 1, 1); + GatherTensorHandlePairs(descriptor, m_TensorHandlePairs); +} + +void NeonConvertFp32ToFp16Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); + + auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) + { + auto input = reinterpret_cast<const float*>(src); + auto output = reinterpret_cast<Half*>(dst); + size_t numElements = size/2; // 2 bytes per fp16 + armnnUtils::FloatingPointConverter::ConvertFloat32To16(input, numElements, output); + }; + + for (const auto& pair : m_TensorHandlePairs) + { + CopyTensorContentsGeneric(pair.first, pair.second, convertFunc); + } +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp new file mode 100644 index 0000000000..b819a8c542 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload<ConvertFp32ToFp16QueueDescriptor> +{ +public: + NeonConvertFp32ToFp16Workload(const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + using TensorHandlePair = std::pair<const ITensorHandle*, ITensorHandle*>; + std::vector<TensorHandlePair> m_TensorHandlePairs; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.cpp new file mode 100644 index 0000000000..547f563d59 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.cpp @@ -0,0 +1,146 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/neon/NeonLayerSupport.hpp> + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include <armnn/Types.hpp> +#include <Half.hpp> + +namespace armnn +{ + +using namespace armcomputetensorutils; + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + + return arm_compute::NEConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + layerInfo); +} + +template<armnn::DataType... dataTypes> +NeonConvolution2dBaseWorkload<dataTypes...>::NeonConvolution2dBaseWorkload( + const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>(descriptor, info) +{ + using arm_compute::NEDirectConvolutionLayer; + + ValidateData(); + + // todo: check tensor shapes match. + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, m_Data.m_Weight->GetTensorInfo(), descriptor.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + const bool preferDirectConvolution = + IsNeonDirectConvolutionPreferred(m_Data.m_Weight->GetTensorInfo(), + m_Data.m_Parameters); + + if (preferDirectConvolution) + { + auto directConvolutionLayer = std::make_unique<arm_compute::NEDirectConvolutionLayer>(memoryManager); + directConvolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(directConvolutionLayer.release()); + } + else + { + auto convolutionLayer = std::make_unique<arm_compute::NEConvolutionLayer>(memoryManager); + convolutionLayer->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + m_ConvolutionLayer.reset(convolutionLayer.release()); + } + BOOST_ASSERT(m_ConvolutionLayer); + + armnn::DataType dataType = m_Data.m_Weight->GetTensorInfo().GetDataType(); + + switch (dataType) + { + case DataType::Float16: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<Half>()); + break; + } + case DataType::Float32: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<float>()); + break; + } + case DataType::QuantisedAsymm8: + { + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->template GetConstTensor<uint8_t>()); + break; + } + default: + { + BOOST_ASSERT_MSG(false, "Unknown DataType."); + } + } +} + +template<armnn::DataType... dataTypes> +void NeonConvolution2dBaseWorkload<dataTypes...>::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +// Generates known implementations for linker. +template class NeonConvolution2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>; +template class NeonConvolution2dBaseWorkload<armnn::DataType::QuantisedAsymm8>; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.hpp new file mode 100644 index 0000000000..6af89c1f01 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dBaseWorkload.hpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <backends/Workload.hpp> + +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <boost/optional.hpp> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Convolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases); + +template<armnn::DataType... dataTypes> +class NeonConvolution2dBaseWorkload : public TypedWorkload<Convolution2dQueueDescriptor, dataTypes...> +{ +public: + using TypedWorkload<Convolution2dQueueDescriptor, dataTypes...>::m_Data; + + NeonConvolution2dBaseWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + virtual void ValidateData() const {}; + +protected: + std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.cpp new file mode 100644 index 0000000000..cd26f8d536 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.cpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dFloatWorkload.hpp" +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/neon/NeonLayerSupport.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonConvolution2dFloatWorkload::NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dFloatWorkload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dFloatWorkload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dFloatWorkload", 1, 1); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.hpp new file mode 100644 index 0000000000..14c77c8bd0 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include <memory> + +namespace armnn +{ + +class NeonConvolution2dFloatWorkload : public NeonConvolution2dBaseWorkload<DataType::Float16, DataType::Float32> +{ +public: + NeonConvolution2dFloatWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + void Execute() const override; + void ValidateData() const override; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonConvolution2dUint8Workload.cpp b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.cpp new file mode 100644 index 0000000000..5affe682b4 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.cpp @@ -0,0 +1,35 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonConvolution2dUint8Workload.hpp" + +namespace armnn +{ + +NeonConvolution2dUint8Workload::NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : NeonConvolution2dBaseWorkload(descriptor, info, memoryManager) +{ + if (m_Data.m_Parameters.m_BiasEnabled) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->template GetConstTensor<int32_t>()); + } + + m_ConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonConvolution2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvolution2dUint8Workload_Execute"); + m_ConvolutionLayer->run(); +} + +void NeonConvolution2dUint8Workload::ValidateData() const +{ + m_Data.ValidateInputsOutputs("NeonConvolution2dUint8Workload", 1, 1); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvolution2dUint8Workload.hpp b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.hpp new file mode 100644 index 0000000000..ef60fc3e84 --- /dev/null +++ b/src/backends/neon/workloads/NeonConvolution2dUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonConvolution2dBaseWorkload.hpp" + +#include "arm_compute/runtime/MemoryManagerOnDemand.h" + +#include <memory> + +namespace armnn +{ + +class NeonConvolution2dUint8Workload : public NeonConvolution2dBaseWorkload<DataType::QuantisedAsymm8> +{ +public: + NeonConvolution2dUint8Workload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + + virtual void ValidateData() const override; + virtual void Execute() const override; +private: +}; + +} //namespace armnnn + diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp new file mode 100644 index 0000000000..ef60b3238d --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.cpp @@ -0,0 +1,49 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionBaseWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases) +{ + const arm_compute::TensorInfo aclInputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclWeightsInfo = + armcomputetensorutils::BuildArmComputeTensorInfo(weights, descriptor.m_DataLayout); + + arm_compute::TensorInfo aclBiasesInfo; + arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + + if (descriptor.m_BiasEnabled) + { + BOOST_ASSERT(biases.is_initialized()); + + aclBiasesInfo = armcomputetensorutils::BuildArmComputeTensorInfo(biases.get(), descriptor.m_DataLayout); + optionalAclBiasesInfo = &aclBiasesInfo; + } + + const arm_compute::PadStrideInfo aclPadStrideInfo = + armcomputetensorutils::BuildArmComputePadStrideInfo(descriptor); + const unsigned int aclDepthMultiplier = weights.GetShape()[0]; + + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, + &aclWeightsInfo, + optionalAclBiasesInfo, + &aclOutputInfo, + aclPadStrideInfo, + aclDepthMultiplier); +} + +}
\ No newline at end of file diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp new file mode 100644 index 0000000000..982992a363 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionBaseWorkload.hpp @@ -0,0 +1,21 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include <boost/optional.hpp> + +namespace armnn +{ + +arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const DepthwiseConvolution2dDescriptor& descriptor, + const TensorInfo& weights, + const boost::optional<TensorInfo>& biases); + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp new file mode 100644 index 0000000000..742a768b94 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.cpp @@ -0,0 +1,93 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionFloatWorkload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionFloatWorkload::NeonDepthwiseConvolutionFloatWorkload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionFloatWorkload", 1, 1); + + arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitializeArmComputeTensorDataForFloatTypes(*m_KernelTensor, m_Data.m_Weight); + + if (m_BiasTensor) + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasTensor, m_Data.m_Bias); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionFloatWorkload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionFloatWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp new file mode 100644 index 0000000000..0109ea10cb --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonDepthwiseConvolutionFloatWorkload : public FloatWorkload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionFloatWorkload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp new file mode 100644 index 0000000000..722b778eba --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.cpp @@ -0,0 +1,93 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/CpuTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +NeonDepthwiseConvolutionUint8Workload::NeonDepthwiseConvolutionUint8Workload( + const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info) +{ + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); + + m_KernelTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_KernelTensor, weightInfo, descriptor.m_DataLayout); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasTensor, m_Data.m_Bias->GetTensorInfo(), descriptor.m_DataLayout); + } + + arm_compute::PadStrideInfo padStrideInfo(m_Data.m_Parameters.m_StrideX, + m_Data.m_Parameters.m_StrideY, + m_Data.m_Parameters.m_PadLeft, + m_Data.m_Parameters.m_PadRight, + m_Data.m_Parameters.m_PadTop, + m_Data.m_Parameters.m_PadBottom, + arm_compute::DimensionRoundingType::FLOOR); + + m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionUint8Workload", 1, 1); + + arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + bool use3x3Optimisation = weightInfo.GetShape()[3] == 3 && weightInfo.GetShape()[2] == 3; + if (use3x3Optimisation) + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer3x3>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer3x3*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + else + { + m_pDepthwiseConvolutionLayer = std::make_unique<arm_compute::NEDepthwiseConvolutionLayer>(); + static_cast<arm_compute::NEDepthwiseConvolutionLayer*>( + m_pDepthwiseConvolutionLayer.get())->configure(&input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo); + } + + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + InitialiseArmComputeTensorData(*m_KernelTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + + if (m_BiasTensor) + { + InitialiseArmComputeTensorData(*m_BiasTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); + } + + m_pDepthwiseConvolutionLayer->prepare(); + FreeUnusedTensors(); +} + +void NeonDepthwiseConvolutionUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionUint8Workload_Execute"); + BOOST_ASSERT(m_pDepthwiseConvolutionLayer); + + m_pDepthwiseConvolutionLayer->run(); +} + +void NeonDepthwiseConvolutionUint8Workload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_KernelTensor); + FreeTensorIfUnused(m_BiasTensor); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp new file mode 100644 index 0000000000..90cf8b0091 --- /dev/null +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionUint8Workload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonDepthwiseConvolutionUint8Workload : public Uint8Workload<DepthwiseConvolution2dQueueDescriptor> +{ +public: + NeonDepthwiseConvolutionUint8Workload(const DepthwiseConvolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable std::unique_ptr<arm_compute::IFunction> m_pDepthwiseConvolutionLayer; + + std::unique_ptr<arm_compute::Tensor> m_KernelTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp new file mode 100644 index 0000000000..a08ba8a6ec --- /dev/null +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFloorFloatWorkload.hpp" + +namespace armnn +{ +NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<FloorQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonFloorFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonFloorFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); + m_Layer.run(); +} +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp new file mode 100644 index 0000000000..478aa94ca4 --- /dev/null +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonFloorFloatWorkload : public FloatWorkload<FloorQueueDescriptor> +{ +public: + NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFloor m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp new file mode 100644 index 0000000000..8cebb4f48f --- /dev/null +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp @@ -0,0 +1,110 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonFullyConnectedWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); + const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); + + arm_compute::TensorInfo aclBiases; + arm_compute::TensorInfo *optionalAclBiases = nullptr; + if (descriptor.m_BiasEnabled) + { + aclBiases = BuildArmComputeTensorInfo(biases); + optionalAclBiases = &aclBiases; + } + + const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + + + return arm_compute::NEFullyConnectedLayer::validate(&aclInput, + &aclWeights, + optionalAclBiases, + &aclOutput, + fullyConnectedLayerInfo); +} + +NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info) + , m_FullyConnectedLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_WeightsTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); + + if (m_Data.m_Parameters.m_BiasEnabled) + { + m_BiasesTensor = std::make_unique<arm_compute::Tensor>(); + BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); + } + + // Construct + arm_compute::FullyConnectedLayerInfo fc_info; + fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); + + // Allocate + if (m_Data.m_Weight->GetTensorInfo().GetDataType() == DataType::QuantisedAsymm8) + { + InitialiseArmComputeTensorData(*m_WeightsTensor, m_Data.m_Weight->GetConstTensor<uint8_t>()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_WeightsTensor, m_Data.m_Weight); + } + + if (m_BiasesTensor) + { + if (m_Data.m_Bias->GetTensorInfo().GetDataType() == DataType::Signed32) + { + InitialiseArmComputeTensorData(*m_BiasesTensor, m_Data.m_Bias->GetConstTensor<int32_t>()); + } + else + { + InitializeArmComputeTensorDataForFloatTypes(*m_BiasesTensor, m_Data.m_Bias); + } + } + + // Force Compute Library to perform the necessary copying and reshaping, after which + // delete all the input tensors that will no longer be needed + m_FullyConnectedLayer.prepare(); + FreeUnusedTensors(); +} + +void NeonFullyConnectedWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute"); + m_FullyConnectedLayer.run(); +} + +void NeonFullyConnectedWorkload::FreeUnusedTensors() +{ + FreeTensorIfUnused(m_WeightsTensor); + FreeTensorIfUnused(m_BiasesTensor); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp new file mode 100644 index 0000000000..9ffac96a86 --- /dev/null +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp @@ -0,0 +1,40 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, + const FullyConnectedDescriptor& descriptor); + +class NeonFullyConnectedWorkload : public BaseWorkload<FullyConnectedQueueDescriptor> +{ +public: + NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NEFullyConnectedLayer m_FullyConnectedLayer; + + std::unique_ptr<arm_compute::Tensor> m_WeightsTensor; + std::unique_ptr<arm_compute::Tensor> m_BiasesTensor; + + void FreeUnusedTensors(); +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..dee789af85 --- /dev/null +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp @@ -0,0 +1,42 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonL2NormalizationFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + CreateAclNormalizationLayerInfoForL2Normalization(input); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info) + , m_Layer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + m_Layer.configure(&input, &output, CreateAclNormalizationLayerInfoForL2Normalization(info.m_InputTensorInfos[0])); +} + +void NeonL2NormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..c1221fb98c --- /dev/null +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +arm_compute::Status NeonL2NormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output); + +class NeonL2NormalizationFloatWorkload : public FloatWorkload<L2NormalizationQueueDescriptor> +{ +public: + NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + // Purposely not a NEL2Normalize function. See constructor. + mutable arm_compute::NENormalizationLayer m_Layer; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp new file mode 100644 index 0000000000..8b2b58d9b1 --- /dev/null +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp @@ -0,0 +1,22 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonLstmFloatWorkload.hpp" + +namespace armnn +{ +NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<LstmQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonLstmFloatWorkload", 1, 1); +} + +void NeonLstmFloatWorkload::Execute() const +{ + throw armnn::Exception("No implementation of Lstm in the Neon backend!"); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp new file mode 100644 index 0000000000..4a5394f0a0 --- /dev/null +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonLstmFloatWorkload : public FloatWorkload<LstmQueueDescriptor> +{ +public: + NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerFloatWorkload.cpp b/src/backends/neon/workloads/NeonMergerFloatWorkload.cpp new file mode 100644 index 0000000000..79039aa51a --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerFloatWorkload.hpp" + +namespace armnn +{ + +void NeonMergerFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerFloatWorkload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerFloatWorkload.hpp b/src/backends/neon/workloads/NeonMergerFloatWorkload.hpp new file mode 100644 index 0000000000..e7088b8c2f --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerFloatWorkload : public NeonBaseMergerWorkload<DataType::Float16, DataType::Float32> +{ +public: + using NeonBaseMergerWorkload<DataType::Float16, DataType::Float32>::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerUint8Workload.cpp b/src/backends/neon/workloads/NeonMergerUint8Workload.cpp new file mode 100644 index 0000000000..3989702bd3 --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerUint8Workload.hpp" + +namespace armnn +{ + +void NeonMergerUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerUint8Workload_Execute"); + NeonBaseMergerWorkload::Execute(); +} + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonMergerUint8Workload.hpp b/src/backends/neon/workloads/NeonMergerUint8Workload.hpp new file mode 100644 index 0000000000..73c0fd55ad --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseMergerWorkload.hpp" + +namespace armnn +{ + +class NeonMergerUint8Workload : public NeonBaseMergerWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseMergerWorkload<DataType::QuantisedAsymm8>::NeonBaseMergerWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp new file mode 100644 index 0000000000..c4241ece19 --- /dev/null +++ b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMultiplicationFloatWorkload.hpp" + + +namespace armnn +{ + +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + return arm_compute::NEPixelWiseMultiplication::validate(&aclInput1, + &aclInput2, + &aclOutput, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +NeonMultiplicationFloatWorkload::NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<MultiplicationQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonMultiplicationFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, + // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be + // ignored for F32 tensors. + m_PixelWiseMultiplication.configure(&input1, + &input2, + &output, + 1.0f, + arm_compute::ConvertPolicy::SATURATE, + arm_compute::RoundingPolicy::TO_ZERO); +} + +void NeonMultiplicationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationFloatWorkload_Execute"); + m_PixelWiseMultiplication.run(); +} + +} //namespace armnn + + diff --git a/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp new file mode 100644 index 0000000000..0a99c8cedc --- /dev/null +++ b/src/backends/neon/workloads/NeonMultiplicationFloatWorkload.hpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ +arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonMultiplicationFloatWorkload : public FloatWorkload<MultiplicationQueueDescriptor> +{ +public: + NeonMultiplicationFloatWorkload(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEPixelWiseMultiplication m_PixelWiseMultiplication; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp new file mode 100644 index 0000000000..472c75f222 --- /dev/null +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp @@ -0,0 +1,70 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonNormalizationFloatWorkload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInput = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::NormalizationLayerInfo normalizationInfo = + armcomputetensorutils::BuildArmComputeNormalizationLayerInfo(descriptor); + + return arm_compute::NENormalizationLayer::validate(&aclInput, &aclOutput, normalizationInfo); +} + +NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<NormalizationQueueDescriptor>(descriptor, info) + , m_NormalizationLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); + std::string reasonIfUnsupported; + if (!IsNeonNormalizationDescParamsSupported(&reasonIfUnsupported, m_Data.m_Parameters)) + { + throw UnimplementedException(reasonIfUnsupported); + } + + // Input and output tensors have to have the same dimensionality. + if (info.m_InputTensorInfos[0].GetShape()[1] != info.m_OutputTensorInfos[0].GetShape()[1] + || info.m_InputTensorInfos[0].GetShape()[0] != info.m_OutputTensorInfos[0].GetShape()[0] + || info.m_InputTensorInfos[0].GetShape()[3] != info.m_OutputTensorInfos[0].GetShape()[3] + || info.m_InputTensorInfos[0].GetShape()[2] != info.m_OutputTensorInfos[0].GetShape()[2]) + { + throw InvalidArgumentException("Normalization requires input and output tensors to have equal dimensionality."); + } + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const arm_compute::NormType normType = + ConvertNormalizationAlgorithmChannelToAclNormType(m_Data.m_Parameters.m_NormChannelType); + arm_compute::NormalizationLayerInfo normalizationInfo(normType, + m_Data.m_Parameters.m_NormSize, + m_Data.m_Parameters.m_Alpha, + m_Data.m_Parameters.m_Beta, + m_Data.m_Parameters.m_K, + false); + + m_NormalizationLayer.configure(&input, &output, normalizationInfo); +} + +void NeonNormalizationFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); + m_NormalizationLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp new file mode 100644 index 0000000000..c6f64c6c15 --- /dev/null +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp @@ -0,0 +1,33 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +namespace armnn +{ + +arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const NormalizationDescriptor& descriptor); + +class NeonNormalizationFloatWorkload : public FloatWorkload<NormalizationQueueDescriptor> +{ +public: + NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NENormalizationLayer m_NormalizationLayer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.cpp b/src/backends/neon/workloads/NeonPermuteWorkload.cpp new file mode 100644 index 0000000000..0bf4aa1319 --- /dev/null +++ b/src/backends/neon/workloads/NeonPermuteWorkload.cpp @@ -0,0 +1,54 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPermuteWorkload.hpp" +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +#include <arm_compute/core/Error.h> + +namespace armnn +{ + +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const PermuteDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const armnn::PermutationVector& mappings = descriptor.m_DimMappings; + + return arm_compute::NEPermute::validate(&aclInputInfo, &aclOutputInfo, + armcomputetensorutils::BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType... DataTypes> +NeonPermuteWorkload<DataTypes...>::NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, + const WorkloadInfo& info) + : TypedWorkload<PermuteQueueDescriptor, DataTypes...>(descriptor, info) +{ + using armcomputetensorutils::BuildArmComputePermutationVector; + + m_Data.ValidateInputsOutputs(GetName(), 1, 1); + + const arm_compute::ITensor& input = static_cast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = static_cast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; + + // Run the layer. + m_PermuteFunction.configure(&input, &output, BuildArmComputePermutationVector(mappings)); +} + +template <armnn::DataType... DataTypes> +void NeonPermuteWorkload<DataTypes...>::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); + m_PermuteFunction.run(); +} + +template class NeonPermuteWorkload<DataType::Float16, DataType::Float32>; +template class NeonPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.hpp b/src/backends/neon/workloads/NeonPermuteWorkload.hpp new file mode 100644 index 0000000000..a85816be38 --- /dev/null +++ b/src/backends/neon/workloads/NeonPermuteWorkload.hpp @@ -0,0 +1,43 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/Workload.hpp> +#include <backends/WorkloadData.hpp> +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +#include <armnn/TypesUtils.hpp> +#include <arm_compute/runtime/NEON/functions/NEPermute.h> + +#include <string> + +namespace armnn +{ +arm_compute::Status NeonPermuteWorkloadValidate(const TensorInfo& input, const TensorInfo& output, + const PermuteDescriptor& descriptor); + +template <armnn::DataType... DataTypes> +class NeonPermuteWorkload : public TypedWorkload<PermuteQueueDescriptor, DataTypes...> +{ +public: + static const std::string& GetName() + { + static const std::string name = std::string("NeonPermuteWorkload"); + return name; + } + + NeonPermuteWorkload(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + +private: + using TypedWorkload<PermuteQueueDescriptor, DataTypes...>::m_Data; + mutable arm_compute::NEPermute m_PermuteFunction; +}; + +using NeonPermuteFloatWorkload = NeonPermuteWorkload<DataType::Float16, DataType::Float32>; +using NeonPermuteUint8Workload = NeonPermuteWorkload<DataType::QuantisedAsymm8>; + +} // namespace armnn diff --git a/src/backends/neon/workloads/NeonPooling2dBaseWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.cpp new file mode 100644 index 0000000000..109e856506 --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.cpp @@ -0,0 +1,47 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dBaseWorkload.hpp" +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor) +{ + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(descriptor); + + return arm_compute::NEPoolingLayer::validate(&aclInputInfo, &aclOutputInfo, layerInfo); +} + +template <armnn::DataType... dataTypes> +NeonPooling2dBaseWorkload<dataTypes...>::NeonPooling2dBaseWorkload( + const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, const std::string& name) + : TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>(descriptor, info) +{ + m_Data.ValidateInputsOutputs(name, 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters); + + m_PoolingLayer.configure(&input, &output, layerInfo); +} + +template class NeonPooling2dBaseWorkload<DataType::Float16, DataType::Float32>; +template class NeonPooling2dBaseWorkload<DataType::QuantisedAsymm8>; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonPooling2dBaseWorkload.hpp b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.hpp new file mode 100644 index 0000000000..8ea41fe18a --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dBaseWorkload.hpp @@ -0,0 +1,37 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonPooling2dWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const Pooling2dDescriptor& descriptor); + +// Base class template providing an implementation of the Pooling2d layer common to all data types. +template <armnn::DataType... dataTypes> +class NeonPooling2dBaseWorkload : public TypedWorkload<Pooling2dQueueDescriptor, dataTypes...> +{ +public: + using TypedWorkload<Pooling2dQueueDescriptor, dataTypes...>::m_Data; + + NeonPooling2dBaseWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info, + const std::string& name); + +protected: + mutable arm_compute::NEPoolingLayer m_PoolingLayer; +}; + + +} //namespace armnn + + + + + diff --git a/src/backends/neon/workloads/NeonPooling2dFloatWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.cpp new file mode 100644 index 0000000000..46996b088c --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.cpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonPooling2dFloatWorkload::NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload<armnn::DataType::Float16, armnn::DataType::Float32>(descriptor, info, + "NeonPooling2dFloatWorkload") +{ +} + +void NeonPooling2dFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dFloatWorkload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonPooling2dFloatWorkload.hpp b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.hpp new file mode 100644 index 0000000000..9b0eebdc2b --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dFloatWorkload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dFloatWorkload : public NeonPooling2dBaseWorkload<armnn::DataType::Float16, + armnn::DataType::Float32> +{ +public: + NeonPooling2dFloatWorkload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + diff --git a/src/backends/neon/workloads/NeonPooling2dUint8Workload.cpp b/src/backends/neon/workloads/NeonPooling2dUint8Workload.cpp new file mode 100644 index 0000000000..8f99a2be86 --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dUint8Workload.cpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonPooling2dUint8Workload.hpp" + + + +namespace armnn +{ + +NeonPooling2dUint8Workload::NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, + const WorkloadInfo& info) + : NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8>(descriptor, info, "NeonPooling2dUint8Workload") +{ +} + +void NeonPooling2dUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dUint8Workload_Execute"); + m_PoolingLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonPooling2dUint8Workload.hpp b/src/backends/neon/workloads/NeonPooling2dUint8Workload.hpp new file mode 100644 index 0000000000..d475c5f721 --- /dev/null +++ b/src/backends/neon/workloads/NeonPooling2dUint8Workload.hpp @@ -0,0 +1,25 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <armnn/Types.hpp> +#include "NeonPooling2dBaseWorkload.hpp" + +namespace armnn +{ + +class NeonPooling2dUint8Workload : public NeonPooling2dBaseWorkload<armnn::DataType::QuantisedAsymm8> +{ +public: + NeonPooling2dUint8Workload(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonReshapeFloatWorkload.cpp b/src/backends/neon/workloads/NeonReshapeFloatWorkload.cpp new file mode 100644 index 0000000000..2dae9466bb --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeFloatWorkload.hpp" + + + +namespace armnn +{ + +NeonReshapeFloatWorkload::NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeFloatWorkload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeFloatWorkload_Execute"); + m_Layer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonReshapeFloatWorkload.hpp b/src/backends/neon/workloads/NeonReshapeFloatWorkload.hpp new file mode 100644 index 0000000000..bdef862419 --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeFloatWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonReshapeFloatWorkload : public FloatWorkload<ReshapeQueueDescriptor> +{ +public: + NeonReshapeFloatWorkload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + + diff --git a/src/backends/neon/workloads/NeonReshapeUint8Workload.cpp b/src/backends/neon/workloads/NeonReshapeUint8Workload.cpp new file mode 100644 index 0000000000..41aa07fe49 --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeUint8Workload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonReshapeUint8Workload.hpp" + + + + +namespace armnn +{ +NeonReshapeUint8Workload::NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, + const WorkloadInfo& info) + : Uint8Workload<ReshapeQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonReshapeUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_Layer.configure(&input, &output); +} + +void NeonReshapeUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeUint8Workload_Execute"); + m_Layer.run(); +} +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonReshapeUint8Workload.hpp b/src/backends/neon/workloads/NeonReshapeUint8Workload.hpp new file mode 100644 index 0000000000..4951873f0b --- /dev/null +++ b/src/backends/neon/workloads/NeonReshapeUint8Workload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +class NeonReshapeUint8Workload : public Uint8Workload<ReshapeQueueDescriptor> +{ +public: + NeonReshapeUint8Workload(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEReshapeLayer m_Layer; +}; + +} //namespace armnn + + + + diff --git a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp new file mode 100644 index 0000000000..0e11d8249f --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.cpp @@ -0,0 +1,30 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxBaseWorkload.hpp" + +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor) +{ + // NOTE: We report 4D Softmax as unsupported until full support is added to ACL + if(input.GetShape().GetNumDimensions() >= 4u) + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, "4d softmax is not supported"); + } + + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NESoftmaxLayer::validate(&aclInputInfo, &aclOutputInfo, descriptor.m_Beta); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp new file mode 100644 index 0000000000..446392cd03 --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxBaseWorkload.hpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSoftmaxWorkloadValidate(const TensorInfo& input, + const TensorInfo& output, + const SoftmaxDescriptor& descriptor); + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp new file mode 100644 index 0000000000..92e5139c1a --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.cpp @@ -0,0 +1,32 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxFloatWorkload.hpp" + +namespace armnn +{ + +NeonSoftmaxFloatWorkload::NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : FloatWorkload<SoftmaxQueueDescriptor>(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxFloatWorkload", 1, 1); + + // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_SoftmaxLayer.configure(&input, &output, m_Data.m_Parameters.m_Beta); +} + +void NeonSoftmaxFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxFloatWorkload_Execute"); + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp new file mode 100644 index 0000000000..83f29222eb --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxFloatWorkload.hpp @@ -0,0 +1,28 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +#include <memory> + +namespace armnn +{ + +class NeonSoftmaxFloatWorkload : public FloatWorkload<SoftmaxQueueDescriptor> +{ +public: + NeonSoftmaxFloatWorkload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp new file mode 100644 index 0000000000..cff869c9b7 --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.cpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSoftmaxUint8Workload.hpp" + +namespace armnn +{ + +NeonSoftmaxUint8Workload::NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + : Uint8Workload<SoftmaxQueueDescriptor>(descriptor, info) + , m_SoftmaxLayer(memoryManager) +{ + m_Data.ValidateInputsOutputs("NeonSoftmaxUint8Workload", 1, 1); + + arm_compute::ITensor& input = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + const auto outputQuantization = output.info()->quantization_info(); + + if ((outputQuantization.scale != (1.0f / 256.0f)) || (outputQuantization.offset != 0)) + { + throw InvalidArgumentException( + "Invalid quantization for output. Only scale = 1.0f / 256.0f and offset = 0 supported"); + } + + m_SoftmaxLayer.configure(&input, &output, descriptor.m_Parameters.m_Beta); +} + +void NeonSoftmaxUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxUint8Workload_Execute"); + + m_SoftmaxLayer.run(); +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp new file mode 100644 index 0000000000..0d72514ec0 --- /dev/null +++ b/src/backends/neon/workloads/NeonSoftmaxUint8Workload.hpp @@ -0,0 +1,26 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> +#include <arm_compute/runtime/MemoryManagerOnDemand.h> + +namespace armnn +{ + +class NeonSoftmaxUint8Workload : public Uint8Workload<SoftmaxQueueDescriptor> +{ +public: + NeonSoftmaxUint8Workload(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + virtual void Execute() const override; + +private: + mutable arm_compute::NESoftmaxLayer m_SoftmaxLayer; +}; + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonSplitterFloatWorkload.cpp b/src/backends/neon/workloads/NeonSplitterFloatWorkload.cpp new file mode 100644 index 0000000000..39ed5b7cbc --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterFloatWorkload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterFloatWorkload.hpp" + +namespace armnn +{ + +void NeonSplitterFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterFloatWorkload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSplitterFloatWorkload.hpp b/src/backends/neon/workloads/NeonSplitterFloatWorkload.hpp new file mode 100644 index 0000000000..744a4fe216 --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterFloatWorkload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterFloatWorkload : public NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32> +{ +public: + using NeonBaseSplitterWorkload<DataType::Float16, DataType::Float32>::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSplitterUint8Workload.cpp b/src/backends/neon/workloads/NeonSplitterUint8Workload.cpp new file mode 100644 index 0000000000..4b2cf8fc91 --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterUint8Workload.cpp @@ -0,0 +1,17 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSplitterUint8Workload.hpp" + +namespace armnn +{ + +void NeonSplitterUint8Workload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterUint8Workload_Execute"); + NeonBaseSplitterWorkload::Execute(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSplitterUint8Workload.hpp b/src/backends/neon/workloads/NeonSplitterUint8Workload.hpp new file mode 100644 index 0000000000..f219cfaa7d --- /dev/null +++ b/src/backends/neon/workloads/NeonSplitterUint8Workload.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "NeonBaseSplitterWorkload.hpp" + +namespace armnn +{ + +class NeonSplitterUint8Workload : public NeonBaseSplitterWorkload<DataType::QuantisedAsymm8> +{ +public: + using NeonBaseSplitterWorkload<DataType::QuantisedAsymm8>::NeonBaseSplitterWorkload; + virtual void Execute() const override; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp new file mode 100644 index 0000000000..2acb829e3d --- /dev/null +++ b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.cpp @@ -0,0 +1,46 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonSubtractionFloatWorkload.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/CpuTensorHandle.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output) +{ + const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); + const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); + const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEArithmeticSubtraction::validate(&aclInput0, + &aclInput1, + &aclOutput, + arm_compute::ConvertPolicy::SATURATE); +} + +NeonSubtractionFloatWorkload::NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, + const WorkloadInfo& info) + : FloatWorkload<SubtractionQueueDescriptor>(descriptor, info) +{ + m_Data.ValidateInputsOutputs("NeonSubtractionFloatWorkload", 2, 1); + + arm_compute::ITensor& input1 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ITensor& input2 = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ITensor& output = boost::polymorphic_downcast<INeonTensorHandle*>(m_Data.m_Outputs[0])->GetTensor(); + + m_SubLayer.configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); +} + +void NeonSubtractionFloatWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionFloatWorkload_Execute"); + m_SubLayer.run(); +} + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp new file mode 100644 index 0000000000..98aeb4cfc5 --- /dev/null +++ b/src/backends/neon/workloads/NeonSubtractionFloatWorkload.hpp @@ -0,0 +1,27 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <backends/neon/workloads/NeonWorkloadUtils.hpp> + +namespace armnn +{ + +arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, + const TensorInfo& input1, + const TensorInfo& output); + +class NeonSubtractionFloatWorkload : public FloatWorkload<SubtractionQueueDescriptor> +{ +public: + NeonSubtractionFloatWorkload(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info); + virtual void Execute() const override; + +private: + mutable arm_compute::NEArithmeticSubtraction m_SubLayer; +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.cpp b/src/backends/neon/workloads/NeonWorkloadUtils.cpp new file mode 100644 index 0000000000..195f090171 --- /dev/null +++ b/src/backends/neon/workloads/NeonWorkloadUtils.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "NeonWorkloadUtils.hpp" +#include <backends/aclCommon/ArmComputeTensorUtils.hpp> +#include <backends/aclCommon/ArmComputeUtils.hpp> +#include <backends/neon/NeonTensorHandle.hpp> +#include <backends/neon/NeonLayerSupport.hpp> +#include <backends/CpuTensorHandle.hpp> + +#include <armnn/Utils.hpp> +#include <armnn/Exceptions.hpp> + +#include <cstring> +#include <boost/assert.hpp> +#include <boost/cast.hpp> +#include <boost/format.hpp> + +#include "Profiling.hpp" + +#include <armnn/Types.hpp> +#include <Half.hpp> + +using namespace armnn::armcomputetensorutils; + +namespace armnn +{ + +// Allocates a tensor and copy the contents in data to the tensor contents. +template<typename T> +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data) +{ + InitialiseArmComputeTensorEmpty(tensor); + CopyArmComputeITensorData(data, tensor); +} + +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const Half* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const float* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const uint8_t* data); +template void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const int32_t* data); + +void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, + const ConstCpuTensorHandle* handle) +{ + BOOST_ASSERT(handle); + switch(handle->GetTensorInfo().GetDataType()) + { + case DataType::Float16: + InitialiseArmComputeTensorData(tensor, handle->GetConstTensor<Half>()); + break; + case DataType::Float32: + InitialiseArmComputeTensorData(tensor, handle->GetConstTensor<float>()); + break; + default: + BOOST_ASSERT_MSG(false, "Unexpected floating point type."); + } +}; + +} //namespace armnn diff --git a/src/backends/neon/workloads/NeonWorkloadUtils.hpp b/src/backends/neon/workloads/NeonWorkloadUtils.hpp new file mode 100644 index 0000000000..22668f6f4b --- /dev/null +++ b/src/backends/neon/workloads/NeonWorkloadUtils.hpp @@ -0,0 +1,34 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include <backends/Workload.hpp> +#include <backends/neon/NeonTensorHandle.hpp> + +#include "NeonTimer.hpp" + +#include <arm_compute/core/Types.h> +#include <arm_compute/core/Helpers.h> +#include <arm_compute/runtime/NEON/NEFunctions.h> +#include <arm_compute/runtime/SubTensor.h> + +#include <boost/cast.hpp> + +namespace armnn +{ +class Layer; + +template<typename T> +void InitialiseArmComputeTensorData(arm_compute::Tensor& tensor, const T* data); + +void InitializeArmComputeTensorDataForFloatTypes(arm_compute::Tensor& tensor, const ConstCpuTensorHandle* handle); +} //namespace armnn + + +#define ARMNN_SCOPED_PROFILING_EVENT_NEON(name) \ + ARMNN_SCOPED_PROFILING_EVENT_WITH_INSTRUMENTS(armnn::Compute::CpuAcc, \ + name, \ + armnn::NeonTimer(), \ + armnn::WallClockTimer()) diff --git a/src/backends/neon/workloads/NeonWorkloads.hpp b/src/backends/neon/workloads/NeonWorkloads.hpp new file mode 100644 index 0000000000..a4ab6b2cac --- /dev/null +++ b/src/backends/neon/workloads/NeonWorkloads.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include "NeonActivationFloatWorkload.hpp" +#include "NeonActivationUint8Workload.hpp" +#include "NeonAdditionFloatWorkload.hpp" +#include "NeonBaseConstantWorkload.hpp" +#include "NeonBaseMergerWorkload.hpp" +#include "NeonBaseSplitterWorkload.hpp" +#include "NeonBatchNormalizationFloatWorkload.hpp" +#include "NeonConstantFloatWorkload.hpp" +#include "NeonConstantUint8Workload.hpp" +#include "NeonConvertFp16ToFp32Workload.hpp" +#include "NeonConvertFp32ToFp16Workload.hpp" +#include "NeonConvolution2dBaseWorkload.hpp" +#include "NeonConvolution2dFloatWorkload.hpp" +#include "NeonConvolution2dUint8Workload.hpp" +#include "NeonDepthwiseConvolutionFloatWorkload.hpp" +#include "NeonDepthwiseConvolutionUint8Workload.hpp" +#include "NeonFloorFloatWorkload.hpp" +#include "NeonFullyConnectedWorkload.hpp" +#include "NeonL2NormalizationFloatWorkload.hpp" +#include "NeonLstmFloatWorkload.hpp" +#include "NeonMergerFloatWorkload.hpp" +#include "NeonMergerUint8Workload.hpp" +#include "NeonMultiplicationFloatWorkload.hpp" +#include "NeonNormalizationFloatWorkload.hpp" +#include "NeonPermuteWorkload.hpp" +#include "NeonPooling2dBaseWorkload.hpp" +#include "NeonPooling2dFloatWorkload.hpp" +#include "NeonPooling2dUint8Workload.hpp" +#include "NeonReshapeFloatWorkload.hpp" +#include "NeonReshapeUint8Workload.hpp" +#include "NeonSoftmaxFloatWorkload.hpp" +#include "NeonSoftmaxUint8Workload.hpp" +#include "NeonSplitterFloatWorkload.hpp" +#include "NeonSplitterUint8Workload.hpp" +#include "NeonSubtractionFloatWorkload.hpp" |