// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "NeonLayerSupport.hpp" #include "NeonBackendId.hpp" #include #include #include #include #include #include #include #ifdef ARMCOMPUTENEON_ENABLED #include "workloads/NeonAdditionFloatWorkload.hpp" #include "workloads/NeonActivationWorkload.hpp" #include "workloads/NeonBatchNormalizationFloatWorkload.hpp" #include "workloads/NeonConvolution2dWorkload.hpp" #include "workloads/NeonDepthwiseConvolutionWorkload.hpp" #include "workloads/NeonL2NormalizationFloatWorkload.hpp" #include "workloads/NeonMultiplicationFloatWorkload.hpp" #include "workloads/NeonNormalizationFloatWorkload.hpp" #include "workloads/NeonFullyConnectedWorkload.hpp" #include "workloads/NeonPermuteWorkload.hpp" #include "workloads/NeonPooling2dWorkload.hpp" #include "workloads/NeonSoftmaxBaseWorkload.hpp" #include "workloads/NeonSubtractionFloatWorkload.hpp" #endif using namespace boost; namespace armnn { namespace { ILayerSupportSharedPtr GetLayerSupportPointer() { static ILayerSupportSharedPtr instance{new NeonLayerSupport}; return instance; } static StaticRegistryInitializer g_RegisterHelper{ LayerSupportRegistryInstance(), NeonBackendId(), []() { return GetLayerSupportPointer(); } }; bool IsNeonBackendSupported(Optional reasonIfUnsupported) { #if ARMCOMPUTENEON_ENABLED return true; #else if (reasonIfUnsupported) { reasonIfUnsupported.value() = "The armnn library has been built without NEON support"; } return false; #endif } template bool IsSupportedForDataTypeNeon(Optional reasonIfUnsupported, DataType dataType, FloatFunc floatFuncPtr, Uint8Func uint8FuncPtr, Params&&... params) { return IsNeonBackendSupported(reasonIfUnsupported) && IsSupportedForDataTypeGeneric(reasonIfUnsupported, dataType, floatFuncPtr, floatFuncPtr, uint8FuncPtr, std::forward(params)...); } #if ARMCOMPUTENEON_ENABLED template inline bool IsWorkloadSupported(FuncType& func, Optional reasonIfUnsupported, Args&&... args) { arm_compute::Status aclStatus = func(std::forward(args)...); const bool supported = (aclStatus.error_code() == arm_compute::ErrorCode::OK); if (!supported && reasonIfUnsupported) { reasonIfUnsupported.value() = aclStatus.error_description(); } return supported; } #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ return IsWorkloadSupported(func, reasonIfUnsupported, __VA_ARGS__); #else #define FORWARD_WORKLOAD_VALIDATE_FUNC(func, reasonIfUnsupported, ...) \ return IsNeonBackendSupported(reasonIfUnsupported); #endif } // anonymous namespace bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input, const TensorInfo& output, const ActivationDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(descriptor); FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool NeonLayerSupport::IsAdditionSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, reasonIfUnsupported, input0, input1, output); } bool NeonLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, const BatchNormalizationDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate, reasonIfUnsupported, input, output, mean, var, beta, gamma, descriptor); } bool NeonLayerSupport::IsConstantSupported(const TensorInfo& output, Optional reasonIfUnsupported) const { return IsSupportedForDataTypeNeon(reasonIfUnsupported, output.GetDataType(), &TrueFunc<>, &TrueFunc<>); } bool NeonLayerSupport::IsConvertFp16ToFp32Supported(const TensorInfo& input, const TensorInfo& output, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(output); ignore_unused(reasonIfUnsupported); return true; } bool NeonLayerSupport::IsConvertFp32ToFp16Supported(const TensorInfo& input, const TensorInfo& output, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(output); ignore_unused(reasonIfUnsupported); return true; } bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input, const TensorInfo& output, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonConvolution2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor, weights, biases); } bool NeonLayerSupport::IsDepthwiseConvolutionSupported(const TensorInfo& input, const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate, reasonIfUnsupported, input, output, descriptor, weights, biases); } bool NeonLayerSupport::IsDivisionSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, Optional reasonIfUnsupported) const { ignore_unused(input0); ignore_unused(input1); ignore_unused(output); ignore_unused(reasonIfUnsupported); return false; } bool NeonLayerSupport::IsFakeQuantizationSupported(const TensorInfo& input, const FakeQuantizationDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(descriptor); ignore_unused(reasonIfUnsupported); return false; } bool NeonLayerSupport::IsFloorSupported(const TensorInfo& input, const TensorInfo& output, Optional reasonIfUnsupported) const { ignore_unused(output); return IsNeonBackendSupported(reasonIfUnsupported) && IsSupportedForDataTypeGeneric(reasonIfUnsupported, input.GetDataType(), &FalseFuncF16<>, &TrueFunc<>, &FalseFuncU8<>); } bool NeonLayerSupport::IsFullyConnectedSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, const TensorInfo& biases, const FullyConnectedDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate, reasonIfUnsupported, input, output, weights, biases, descriptor); } bool NeonLayerSupport::IsInputSupported(const TensorInfo& input, Optional reasonIfUnsupported) const { return IsSupportedForDataTypeNeon(reasonIfUnsupported, input.GetDataType(), &TrueFunc<>, &TrueFunc<>); } bool NeonLayerSupport::IsL2NormalizationSupported(const TensorInfo& input, const TensorInfo& output, const L2NormalizationDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool NeonLayerSupport::IsLstmSupported(const TensorInfo& input, const TensorInfo& outputStateIn, const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, const TensorInfo& output, const LstmDescriptor& descriptor, const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, const TensorInfo& forgetGateBias, const TensorInfo& cellBias, const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, const TensorInfo* cellToOutputWeights, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(outputStateIn); ignore_unused(cellStateIn); ignore_unused(scratchBuffer); ignore_unused(outputStateOut); ignore_unused(cellStateOut); ignore_unused(output); ignore_unused(descriptor); ignore_unused(inputToForgetWeights); ignore_unused(inputToCellWeights); ignore_unused(inputToOutputWeights); ignore_unused(recurrentToForgetWeights); ignore_unused(recurrentToCellWeights); ignore_unused(recurrentToOutputWeights); ignore_unused(forgetGateBias); ignore_unused(cellBias); ignore_unused(outputGateBias); ignore_unused(inputToInputWeights); ignore_unused(recurrentToInputWeights); ignore_unused(cellToInputWeights); ignore_unused(inputGateBias); ignore_unused(projectionWeights); ignore_unused(projectionBias); ignore_unused(cellToForgetWeights); ignore_unused(cellToOutputWeights); ignore_unused(reasonIfUnsupported); return false; } bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input, const TensorInfo& output, const MeanDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(output); ignore_unused(descriptor); ignore_unused(reasonIfUnsupported); return false; } bool NeonLayerSupport::IsMergerSupported(const std::vector inputs, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(descriptor); return IsSupportedForDataTypeNeon(reasonIfUnsupported, inputs[0]->GetDataType(), &TrueFunc<>, &TrueFunc<>); } bool NeonLayerSupport::IsMultiplicationSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, reasonIfUnsupported, input0, input1, output); } bool NeonLayerSupport::IsNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const NormalizationDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool NeonLayerSupport::IsOutputSupported(const TensorInfo& output, Optional reasonIfUnsupported) const { return IsSupportedForDataTypeNeon(reasonIfUnsupported, output.GetDataType(), &TrueFunc<>, &TrueFunc<>); } bool NeonLayerSupport::IsPadSupported(const TensorInfo& input, const TensorInfo& output, const PadDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(output); ignore_unused(descriptor); ignore_unused(reasonIfUnsupported); return false; } bool NeonLayerSupport::IsPermuteSupported(const TensorInfo& input, const TensorInfo& output, const PermuteDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPermuteWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool NeonLayerSupport::IsPooling2dSupported(const TensorInfo& input, const TensorInfo& output, const Pooling2dDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonPooling2dWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool NeonLayerSupport::IsReshapeSupported(const TensorInfo& input, Optional reasonIfUnsupported) const { return IsSupportedForDataTypeNeon(reasonIfUnsupported, input.GetDataType(), &TrueFunc<>, &TrueFunc<>); } bool NeonLayerSupport::IsResizeBilinearSupported(const TensorInfo& input, Optional reasonIfUnsupported) const { ignore_unused(input); ignore_unused(reasonIfUnsupported); return false; } bool NeonLayerSupport::IsSoftmaxSupported(const TensorInfo& input, const TensorInfo& output, const SoftmaxDescriptor& descriptor, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool NeonLayerSupport::IsSplitterSupported(const TensorInfo& input, const ViewsDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(descriptor); return IsSupportedForDataTypeNeon(reasonIfUnsupported, input.GetDataType(), &TrueFunc<>, &TrueFunc<>); } bool NeonLayerSupport::IsSubtractionSupported(const TensorInfo& input0, const TensorInfo& input1, const TensorInfo& output, Optional reasonIfUnsupported) const { FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate, reasonIfUnsupported, input0, input1, output); } bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) { // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases, // and complement with NEDirectConvolutionLayerKernel::configure() implementation. // Only 1x1 is using direct convolution. Performance results and details are in: // https://jira.arm.com/browse/IVGCVSW-1003 // Measurements were taken as of clframework: f105ab972135bcd21304883eff040d7e587099bc const bool dataTypeSupported = (weightInfo.GetDataType() == armnn::DataType::Float32); // Strides: 1|2|3 const bool strideSupported = (desc.m_StrideX == 1 || desc.m_StrideX == 2 || desc.m_StrideX == 3) && (desc.m_StrideY == 1 || desc.m_StrideY == 2 || desc.m_StrideY == 3); auto paddingLargerThan = [](const Convolution2dDescriptor& conv2ddesc, unsigned int value) { return conv2ddesc.m_PadLeft > value || conv2ddesc.m_PadRight > value || conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value; }; // Supported sizes and padding. const bool sizeAndPaddingSupported = // Pad > 0 not supported for 1x1 weights. (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u)); const bool preferDirectConvolution = dataTypeSupported && strideSupported && sizeAndPaddingSupported && // NEDirectConvolutionLayerKernel doesn't support NULL bias. desc.m_BiasEnabled; return preferDirectConvolution; } } // namespace armnn