diff options
Diffstat (limited to 'src/armnn/backends/NeonLayerSupport.cpp')
-rw-r--r-- | src/armnn/backends/NeonLayerSupport.cpp | 242 |
1 files changed, 143 insertions, 99 deletions
diff --git a/src/armnn/backends/NeonLayerSupport.cpp b/src/armnn/backends/NeonLayerSupport.cpp index bfc84bd086..3aef4e60aa 100644 --- a/src/armnn/backends/NeonLayerSupport.cpp +++ b/src/armnn/backends/NeonLayerSupport.cpp @@ -15,34 +15,29 @@ #include <boost/core/ignore_unused.hpp> #ifdef ARMCOMPUTENEON_ENABLED +#include "NeonWorkloads/NeonAdditionFloat32Workload.hpp" +#include "NeonWorkloads/NeonActivationFloat32Workload.hpp" +#include "NeonWorkloads/NeonBatchNormalizationFloat32Workload.hpp" #include "NeonWorkloads/NeonConvolution2dBaseWorkload.hpp" -#include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" +#include "NeonWorkloads/NeonDepthwiseConvolutionBaseWorkload.hpp" +#include "NeonWorkloads/NeonL2NormalizationFloat32Workload.hpp" +#include "NeonWorkloads/NeonMultiplicationFloat32Workload.hpp" +#include "NeonWorkloads/NeonNormalizationFloat32Workload.hpp" +#include "NeonWorkloads/NeonFullyConnectedFloat32Workload.hpp" #include "NeonWorkloads/NeonPermuteWorkload.hpp" +#include "NeonWorkloads/NeonPooling2dBaseWorkload.hpp" +#include "NeonWorkloads/NeonSoftmaxBaseWorkload.hpp" #endif using namespace boost; namespace armnn { -bool IsNeonActivationUint8Supported(std::string* reasonIfUnsupported, const ActivationDescriptor& parameters) -{ - if (parameters.m_Function != ActivationFunction::BoundedReLu) - { - if (reasonIfUnsupported) - { - *reasonIfUnsupported = "Unsupported activation function, only BoundedReLu is supported)"; - } - - return false; - } - - return true; -} bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convolution2dDescriptor& desc) { // See arm_compute::NEDirectConvolutionLayer documentation for the supported cases, - // and complement with NEDirectConvolutionLayerKernel::configure() implementation + // and complement with NEDirectConvolutionLayerKernel::configure() implementation. // Only 1x1 is using direct convolution. Performance results and details are in: // https://jira.arm.com/browse/IVGCVSW-1003 @@ -60,15 +55,15 @@ bool IsNeonDirectConvolutionPreferred(const TensorInfo& weightInfo, const Convol conv2ddesc.m_PadTop > value || conv2ddesc.m_PadBottom > value; }; - // Supported sizes and padding + // Supported sizes and padding. const bool sizeAndPaddingSupported = - // Pad > 0 not supported for 1x1 weights + // Pad > 0 not supported for 1x1 weights. (weightInfo.GetShape()[2] == 1 && weightInfo.GetShape()[3] == 1 && !paddingLargerThan(desc, 0u)); const bool preferDirectConvolution = dataTypeSupported && strideSupported && sizeAndPaddingSupported && - // NEDirectConvolutionLayerKernel doesn't support NULL bias + // NEDirectConvolutionLayerKernel doesn't support NULL bias. desc.m_BiasEnabled; return preferDirectConvolution; } @@ -108,10 +103,10 @@ bool IsNeonBackendSupported(std::string* reasonIfUnsupported) #endif } -template<typename Float32Func, typename Uint8Func, typename ... Params> +template<typename FloatFunc, typename Uint8Func, typename ... Params> bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported, DataType dataType, - Float32Func floatFuncPtr, + FloatFunc floatFuncPtr, Uint8Func uint8FuncPtr, Params&&... params) { @@ -119,6 +114,7 @@ bool IsSupportedForDataTypeNeon(std::string* reasonIfUnsupported, IsSupportedForDataTypeGeneric(reasonIfUnsupported, dataType, floatFuncPtr, + floatFuncPtr, uint8FuncPtr, std::forward<Params>(params)...); } @@ -144,43 +140,16 @@ inline bool IsWorkloadSupported(FuncType& func, std::string* reasonIfUnsupported #endif bool IsActivationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, const ActivationDescriptor& descriptor, std::string* reasonIfUnsupported) { ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<const ActivationDescriptor&>, - &IsNeonActivationUint8Supported, - descriptor); -} - -bool IsNeonDepthwiseConvolution2dDescParamsSupported(std::string* reasonIfUnsupported, - const DepthwiseConvolution2dDescriptor& parameters, - const TensorInfo& weights) -{ - ignore_unused(weights); - - if (parameters.m_StrideX < 1 || parameters.m_StrideX > 3) - { - if (reasonIfUnsupported) - { - *reasonIfUnsupported = "m_StrideX can only be 1, 2 or 3"; - } - return false; - } - - // weights.GetShape()[0] = channel multiplier - if (weights.GetShape()[0] != 1) - { - if (reasonIfUnsupported) - { - *reasonIfUnsupported = "Channel multiplier only supports the value 1 in the NEON backend"; - } - return false; - } - - return true; + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonActivationWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor); } bool IsAdditionSupportedNeon(const TensorInfo& input0, @@ -188,23 +157,31 @@ bool IsAdditionSupportedNeon(const TensorInfo& input0, const TensorInfo& output, std::string* reasonIfUnsupported) { - ignore_unused(input1); - ignore_unused(output); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input0.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); } bool IsBatchNormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& mean, + const TensorInfo& var, + const TensorInfo& beta, + const TensorInfo& gamma, const BatchNormalizationDescriptor& descriptor, std::string* reasonIfUnsupported) { - ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonBatchNormalizationValidate, + reasonIfUnsupported, + input, + output, + mean, + var, + beta, + gamma, + descriptor); } bool IsConstantSupportedNeon(const TensorInfo& output, @@ -233,27 +210,40 @@ bool IsConvolution2dSupportedNeon(const TensorInfo& input, } bool IsDepthwiseConvolutionSupportedNeon(const TensorInfo& input, + const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, + const TensorInfo& biases, std::string* reasonIfUnsupported) { - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &IsNeonDepthwiseConvolution2dDescParamsSupported, - &IsNeonDepthwiseConvolution2dDescParamsSupported, - descriptor, - weights); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDepthwiseConvolutionWorkloadValidate, + reasonIfUnsupported, + input, + output, + descriptor, + weights, + biases); } bool IsFullyConnectedSupportedNeon(const TensorInfo& input, + const TensorInfo& output, + const TensorInfo& weights, + const TensorInfo& biases, const FullyConnectedDescriptor& descriptor, std::string* reasonIfUnsupported) { - ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + // At the moment U8 is unsupported + if (input.GetDataType() == DataType::QuantisedAsymm8) + { + return false; + } + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonFullyConnectedWorkloadValidate, + reasonIfUnsupported, + input, + output, + weights, + biases, + descriptor); } bool IsInputSupportedNeon(const TensorInfo& input, @@ -266,12 +256,10 @@ bool IsInputSupportedNeon(const TensorInfo& input, } bool IsL2NormalizationSupportedNeon(const TensorInfo& input, + const TensorInfo& output, std::string* reasonIfUnsupported) { - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFunc<>); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonL2NormalizationWorkloadValidate, reasonIfUnsupported, input, output); } bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, @@ -287,13 +275,14 @@ bool IsMergerSupportedNeon(const std::vector<const TensorInfo*> inputs, bool IsMultiplicationSupportedNeon(const TensorInfo& input0, const TensorInfo& input1, + const TensorInfo& output, std::string* reasonIfUnsupported) { - ignore_unused(input1); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input0.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, + reasonIfUnsupported, + input0, + input1, + output); } bool IsNormalizationSupportedNeon(const TensorInfo& input, @@ -301,11 +290,7 @@ bool IsNormalizationSupportedNeon(const TensorInfo& input, const NormalizationDescriptor& descriptor, std::string* reasonIfUnsupported) { - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &IsNeonNormalizationDescParamsSupported, - &FalseFuncU8<const NormalizationDescriptor&>, - descriptor); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonNormalizationWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool IsOutputSupportedNeon(const TensorInfo& output, @@ -341,14 +326,11 @@ bool IsResizeBilinearSupportedNeon(const TensorInfo& input, } bool IsSoftmaxSupportedNeon(const TensorInfo& input, + const TensorInfo& output, const SoftmaxDescriptor& descriptor, std::string* reasonIfUnsupported) { - ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &TrueFunc<>); + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSoftmaxWorkloadValidate, reasonIfUnsupported, input, output, descriptor); } bool IsSplitterSupportedNeon(const TensorInfo& input, @@ -385,10 +367,72 @@ bool IsFloorSupportedNeon(const TensorInfo& input, std::string* reasonIfUnsupported) { ignore_unused(output); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - input.GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + return IsNeonBackendSupported(reasonIfUnsupported) && + IsSupportedForDataTypeGeneric(reasonIfUnsupported, + input.GetDataType(), + &FalseFuncF16<>, + &TrueFunc<>, + &FalseFuncU8<>); +} + +bool IsLstmSupportedNeon(const TensorInfo& input, const TensorInfo& outputStateIn, + const TensorInfo& cellStateIn, const TensorInfo& scratchBuffer, + const TensorInfo& outputStateOut, const TensorInfo& cellStateOut, + const TensorInfo& output, const LstmDescriptor& descriptor, + const TensorInfo& inputToForgetWeights, const TensorInfo& inputToCellWeights, + const TensorInfo& inputToOutputWeights, const TensorInfo& recurrentToForgetWeights, + const TensorInfo& recurrentToCellWeights, const TensorInfo& recurrentToOutputWeights, + const TensorInfo& forgetGateBias, const TensorInfo& cellBias, + const TensorInfo& outputGateBias, const TensorInfo* inputToInputWeights, + const TensorInfo* recurrentToInputWeights, const TensorInfo* cellToInputWeights, + const TensorInfo* inputGateBias, const TensorInfo* projectionWeights, + const TensorInfo* projectionBias, const TensorInfo* cellToForgetWeights, + const TensorInfo* cellToOutputWeights, std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(outputStateIn); + ignore_unused(cellStateIn); + ignore_unused(scratchBuffer); + ignore_unused(outputStateOut); + ignore_unused(cellStateOut); + ignore_unused(output); + ignore_unused(descriptor); + ignore_unused(inputToForgetWeights); + ignore_unused(inputToCellWeights); + ignore_unused(inputToOutputWeights); + ignore_unused(recurrentToForgetWeights); + ignore_unused(recurrentToCellWeights); + ignore_unused(recurrentToOutputWeights); + ignore_unused(forgetGateBias); + ignore_unused(cellBias); + ignore_unused(outputGateBias); + ignore_unused(inputToInputWeights); + ignore_unused(recurrentToInputWeights); + ignore_unused(cellToInputWeights); + ignore_unused(inputGateBias); + ignore_unused(projectionWeights); + ignore_unused(projectionBias); + ignore_unused(cellToForgetWeights); + ignore_unused(cellToOutputWeights); + return false; +} + +bool IsConvertFp16ToFp32SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + return true; +} + +bool IsConvertFp32ToFp16SupportedNeon(const TensorInfo& input, + const TensorInfo& output, + std::string* reasonIfUnsupported) +{ + ignore_unused(input); + ignore_unused(output); + return true; } } |