From 07810fc2fcdd34db74222d90cc73ef12a88e7b78 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Thu, 12 Nov 2020 10:58:48 +0000 Subject: IVGCVSW-5328-5329 Fuse Activation * Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis Signed-off-by: Mike Kelly Signed-off-by: Teresa Charlin Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d --- .../neon/workloads/NeonAdditionWorkload.cpp | 15 ++++++++++--- .../neon/workloads/NeonAdditionWorkload.hpp | 4 +++- .../workloads/NeonBatchNormalizationWorkload.cpp | 17 ++++++++++++--- .../workloads/NeonBatchNormalizationWorkload.hpp | 3 ++- .../neon/workloads/NeonConvolution2dWorkload.cpp | 15 +++++++++---- .../neon/workloads/NeonConvolution2dWorkload.hpp | 3 ++- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 25 ++++++++++++++-------- .../workloads/NeonDepthwiseConvolutionWorkload.hpp | 4 +++- .../neon/workloads/NeonDivisionWorkload.cpp | 20 ++++++++++++----- .../neon/workloads/NeonDivisionWorkload.hpp | 5 +++-- .../neon/workloads/NeonFullyConnectedWorkload.cpp | 16 ++++++++------ .../neon/workloads/NeonFullyConnectedWorkload.hpp | 3 ++- .../neon/workloads/NeonMultiplicationWorkload.cpp | 16 +++++++++++--- .../neon/workloads/NeonMultiplicationWorkload.hpp | 4 +++- .../neon/workloads/NeonSubtractionWorkload.cpp | 17 ++++++++++++--- .../neon/workloads/NeonSubtractionWorkload.hpp | 4 +++- 16 files changed, 126 insertions(+), 45 deletions(-) (limited to 'src/backends/neon/workloads') diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp index cb0c8a471f..9300b317a9 100644 --- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp +++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp @@ -7,6 +7,8 @@ #include "NeonWorkloadUtils.hpp" #include +#include + #include #include @@ -17,16 +19,21 @@ namespace armnn arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::NEArithmeticAddition::validate(&aclInput0, &aclInput1, &aclOutput, - arm_compute::ConvertPolicy::SATURATE); + arm_compute::ConvertPolicy::SATURATE, + activationInfo); } @@ -40,8 +47,10 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri arm_compute::ITensor& input2 = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + auto layer = std::make_unique(); - layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); + layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo); m_AddLayer.reset(layer.release()); } diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.hpp b/src/backends/neon/workloads/NeonAdditionWorkload.hpp index 826fb1f3dd..8e43cbdb6d 100644 --- a/src/backends/neon/workloads/NeonAdditionWorkload.hpp +++ b/src/backends/neon/workloads/NeonAdditionWorkload.hpp @@ -8,6 +8,7 @@ #include #include +#include #include namespace armnn @@ -15,7 +16,8 @@ namespace armnn arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonAdditionWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp index ff777dbf9b..33480faf69 100644 --- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp +++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp @@ -8,7 +8,10 @@ #include "NeonWorkloadUtils.hpp" #include +#include + #include + #include #include @@ -24,7 +27,8 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor) + const BatchNormalizationDescriptor& descriptor, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); @@ -39,13 +43,17 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, const arm_compute::TensorInfo aclGammaInfo = armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::NEBatchNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, &aclMeanInfo, &aclVarInfo, &aclBetaInfo, &aclGammaInfo, - descriptor.m_Eps); + descriptor.m_Eps, + activationInfo); } NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( @@ -73,6 +81,8 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( m_Beta = std::make_unique(); BuildArmComputeTensor(*m_Beta, m_Data.m_Beta->GetTensorInfo()); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + auto layer = std::make_unique(); layer->configure(&input, &output, @@ -80,7 +90,8 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( m_Variance.get(), m_Beta.get(), m_Gamma.get(), - m_Data.m_Parameters.m_Eps); + m_Data.m_Parameters.m_Eps, + activationInfo); m_Layer.reset(layer.release()); InitializeArmComputeTensorData(*m_Mean, m_Data.m_Mean); diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp index 3619ea0d73..fea778fb1c 100644 --- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp +++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.hpp @@ -21,7 +21,8 @@ arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, - const BatchNormalizationDescriptor& descriptor); + const BatchNormalizationDescriptor& descriptor, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonBatchNormalizationWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index af6f1aee78..fd8be17dfd 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -6,6 +6,7 @@ #include "NeonConvolution2dWorkload.hpp" #include +#include #include #include #include @@ -25,7 +26,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases, - bool isFastMathEnabled) + bool isFastMathEnabled, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); @@ -47,6 +49,9 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::NEConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, optionalAclBiasesInfo, @@ -54,7 +59,7 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, layerInfo, arm_compute::WeightsInfo(), aclDilationInfo, - arm_compute::ActivationLayerInfo(), + activationInfo, isFastMathEnabled); } @@ -92,6 +97,8 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + auto convolutionLayer = std::make_unique(memoryManager); convolutionLayer->configure(&input, m_KernelTensor.get(), @@ -100,7 +107,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( padStrideInfo, arm_compute::WeightsInfo(), aclDilationInfo, - arm_compute::ActivationLayerInfo(), + activationInfo, isFastMathEnabled); m_ConvolutionMethod = @@ -110,7 +117,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( padStrideInfo, arm_compute::WeightsInfo(), aclDilationInfo, - arm_compute::ActivationLayerInfo(), + activationInfo, isFastMathEnabled); m_ConvolutionLayer.reset(convolutionLayer.release()); diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp index 860d78ba7e..4b6e58ce41 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp @@ -21,7 +21,8 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases, - bool isFastMathEnabled = false); + bool isFastMathEnabled = false, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonConvolution2dWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index a9a3c75bfd..db6bcc3ecb 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -10,6 +10,7 @@ #include #include +#include #include @@ -29,7 +30,8 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, - const Optional& biases) + const Optional& biases, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); @@ -59,13 +61,16 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D( descriptor.m_DilationX,descriptor.m_DilationY); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, optionalAclBiasesInfo, &aclOutputInfo, aclPadStrideInfo, aclDepthMultiplier, - arm_compute::ActivationLayerInfo(), + activationInfo, aclDilationInfo); } @@ -116,16 +121,18 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + m_pDepthwiseConvolutionLayer = std::make_unique(); static_cast( m_pDepthwiseConvolutionLayer.get())->configure(&input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier, - arm_compute::ActivationLayerInfo(), - aclDilationInfo); + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier, + activationInfo, + aclDilationInfo); ARMNN_ASSERT(m_pDepthwiseConvolutionLayer); diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp index 85932d3f9a..d257b91638 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.hpp @@ -19,7 +19,9 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, - const Optional& biases); + const Optional& biases, + const ActivationDescriptor* activationDescriptor + = nullptr); class NeonDepthwiseConvolutionWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp index fc353f136d..1a26d9510a 100644 --- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp @@ -6,23 +6,31 @@ #include "NeonDivisionWorkload.hpp" #include +#include + #include + #include namespace armnn { arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& input1, + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::NEElementwiseDivision::validate(&aclInput0, - &aclInput1, - &aclOutput); + &aclInput1, + &aclOutput, + activationInfo); } NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descriptor, @@ -35,7 +43,9 @@ NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descri arm_compute::ITensor& input1 = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_DivLayer.configure(&input0, &input1, &output); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + m_DivLayer.configure(&input0, &input1, &output, activationInfo); } void NeonDivisionWorkload::Execute() const diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.hpp b/src/backends/neon/workloads/NeonDivisionWorkload.hpp index 2405d9a4ab..fffe02fc00 100644 --- a/src/backends/neon/workloads/NeonDivisionWorkload.hpp +++ b/src/backends/neon/workloads/NeonDivisionWorkload.hpp @@ -13,8 +13,9 @@ namespace armnn { arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo& input0, - const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& input1, + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonDivisionWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp index e808c60c0c..31489a0c32 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp @@ -6,9 +6,12 @@ #include "NeonFullyConnectedWorkload.hpp" #include "NeonWorkloadUtils.hpp" + #include #include + #include + #include #include @@ -21,7 +24,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) + const FullyConnectedDescriptor& descriptor, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); @@ -36,8 +40,7 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, } const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); - + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor); return arm_compute::NEFullyConnectedLayer::validate(&aclInput, &aclWeights, @@ -64,9 +67,10 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue BuildArmComputeTensor(*m_BiasesTensor, m_Data.m_Bias->GetTensorInfo()); } - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + arm_compute::FullyConnectedLayerInfo fc_info = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); auto layer = std::make_unique(memoryManager); layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp index 1cd8be109a..8dc7fdcd6c 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.hpp @@ -21,7 +21,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); + const FullyConnectedDescriptor& descriptor, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonFullyConnectedWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp index 6f78b8eacc..e4ed195922 100644 --- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp +++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp @@ -7,6 +7,8 @@ #include "NeonWorkloadUtils.hpp" +#include + #include #include @@ -16,7 +18,8 @@ namespace armnn arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); @@ -26,6 +29,9 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, arm_compute::ConvertPolicy::SATURATE : arm_compute::ConvertPolicy::WRAP; + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be // ignored for F32 tensors. @@ -34,7 +40,8 @@ arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, &aclOutput, 1.0f, convertPolicy, - arm_compute::RoundingPolicy::TO_ZERO); + arm_compute::RoundingPolicy::TO_ZERO, + activationInfo); } NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueueDescriptor& descriptor, @@ -52,6 +59,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue arm_compute::ConvertPolicy::SATURATE : arm_compute::ConvertPolicy::WRAP; + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be // ignored for F32 tensors. @@ -61,7 +70,8 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue &output, 1.0f, convertPolicy, - arm_compute::RoundingPolicy::TO_ZERO); + arm_compute::RoundingPolicy::TO_ZERO, + activationInfo); m_PixelWiseMultiplication.reset(layer.release()); } diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp index bfbaf776c1..d2bcd04482 100644 --- a/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp +++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.hpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -16,7 +17,8 @@ namespace armnn { arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonMultiplicationWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp index ccc2bfe58b..21f0f6fa41 100644 --- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp +++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp @@ -6,8 +6,12 @@ #include "NeonSubtractionWorkload.hpp" #include "NeonWorkloadUtils.hpp" + #include +#include + #include + #include #include @@ -17,16 +21,21 @@ namespace armnn arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput0 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::NEArithmeticSubtraction::validate(&aclInput0, &aclInput1, &aclOutput, - arm_compute::ConvertPolicy::SATURATE); + arm_compute::ConvertPolicy::SATURATE, + activationInfo); } NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescriptor& descriptor, @@ -39,8 +48,10 @@ NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescripto arm_compute::ITensor& input2 = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + auto layer = std::make_unique(); - layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE); + layer->configure(&input1, &input2, &output, arm_compute::ConvertPolicy::SATURATE, activationInfo); m_SubLayer.reset(layer.release()); } diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.hpp b/src/backends/neon/workloads/NeonSubtractionWorkload.hpp index 3326f8bf4a..19d0811a18 100644 --- a/src/backends/neon/workloads/NeonSubtractionWorkload.hpp +++ b/src/backends/neon/workloads/NeonSubtractionWorkload.hpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -17,7 +18,8 @@ namespace armnn arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); class NeonSubtractionWorkload : public BaseWorkload { -- cgit v1.2.1