From 07810fc2fcdd34db74222d90cc73ef12a88e7b78 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Thu, 12 Nov 2020 10:58:48 +0000 Subject: IVGCVSW-5328-5329 Fuse Activation * Added Fused Activation Optimization to both CL and Neon backends. * Added Fused Activation support to all the CL and Neon workloads that support it. * Changed ProfilingTest network to be a Convolution layer followed by an Abs layer rather than an Activation layer. * Added IBackendInternal::OptimizeSubgraphView function that can accept a ModelOptions. * Network will now call OptimizeSubgraphView passing in the ModelOptions. Signed-off-by: Keith Davis Signed-off-by: Mike Kelly Signed-off-by: Teresa Charlin Change-Id: Ib536ac3cbafc7d9b35c139ad9a65b7735262cd9d --- src/backends/cl/workloads/ClAdditionWorkload.cpp | 15 ++++++++++++--- src/backends/cl/workloads/ClAdditionWorkload.hpp | 3 ++- .../ClBatchNormalizationFloatWorkload.cpp | 22 +++++++++++++++++----- .../ClBatchNormalizationFloatWorkload.hpp | 3 ++- .../cl/workloads/ClConvolution2dWorkload.cpp | 14 ++++++++++---- .../cl/workloads/ClConvolution2dWorkload.hpp | 3 ++- .../workloads/ClDepthwiseConvolutionWorkload.cpp | 14 +++++++++++--- .../workloads/ClDepthwiseConvolutionWorkload.hpp | 3 ++- .../cl/workloads/ClDivisionFloatWorkload.cpp | 19 ++++++++++++++----- .../cl/workloads/ClDivisionFloatWorkload.hpp | 3 ++- .../cl/workloads/ClFullyConnectedWorkload.cpp | 13 ++++++++----- .../cl/workloads/ClFullyConnectedWorkload.hpp | 3 ++- .../cl/workloads/ClMultiplicationWorkload.cpp | 20 ++++++++++++++++---- .../cl/workloads/ClMultiplicationWorkload.hpp | 3 ++- .../cl/workloads/ClSubtractionWorkload.cpp | 16 +++++++++++++--- .../cl/workloads/ClSubtractionWorkload.hpp | 3 ++- 16 files changed, 117 insertions(+), 40 deletions(-) (limited to 'src/backends/cl/workloads') diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp index 18e2400ccd..7e75a04110 100644 --- a/src/backends/cl/workloads/ClAdditionWorkload.cpp +++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp @@ -8,6 +8,7 @@ #include #include #include +#include #include "ClWorkloadUtils.hpp" @@ -26,7 +27,10 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); + + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo); } void ClAdditionWorkload::Execute() const @@ -37,16 +41,21 @@ void ClAdditionWorkload::Execute() const arm_compute::Status ClAdditionValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + const arm_compute::Status aclStatus = arm_compute::CLArithmeticAddition::validate(&aclInput0Info, &aclInput1Info, &aclOutputInfo, - g_AclConvertPolicy); + g_AclConvertPolicy, + activationInfo); return aclStatus; } diff --git a/src/backends/cl/workloads/ClAdditionWorkload.hpp b/src/backends/cl/workloads/ClAdditionWorkload.hpp index 62bd0ae20b..372c4bc6f7 100644 --- a/src/backends/cl/workloads/ClAdditionWorkload.hpp +++ b/src/backends/cl/workloads/ClAdditionWorkload.hpp @@ -25,5 +25,6 @@ private: arm_compute::Status ClAdditionValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); } //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp index fa0be85100..68942e2a01 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -4,12 +4,16 @@ // #include "ClBatchNormalizationFloatWorkload.hpp" +#include "ClWorkloadUtils.hpp" + #include + #include + #include -#include +#include -#include "ClWorkloadUtils.hpp" +#include namespace armnn { @@ -21,7 +25,8 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, - const BatchNormalizationDescriptor &desc) + const BatchNormalizationDescriptor& desc, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input, desc.m_DataLayout); @@ -36,13 +41,17 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, const arm_compute::TensorInfo aclGammaInfo = armcomputetensorutils::BuildArmComputeTensorInfo(gamma, desc.m_DataLayout); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::CLBatchNormalizationLayer::validate(&aclInputInfo, &aclOutputInfo, &aclMeanInfo, &aclVarInfo, &aclBetaInfo, &aclGammaInfo, - desc.m_Eps); + desc.m_Eps, + activationInfo); } ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( @@ -70,13 +79,16 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + m_Layer.configure(&input, &output, m_Mean.get(), m_Variance.get(), m_Beta.get(), m_Gamma.get(), - m_Data.m_Parameters.m_Eps); + m_Data.m_Parameters.m_Eps, + activationInfo); InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean); InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance); diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp index e94bef20ac..ef5778309e 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp @@ -19,7 +19,8 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, - const BatchNormalizationDescriptor& desc); + const BatchNormalizationDescriptor& desc, + const ActivationDescriptor* activationDescriptor = nullptr); class ClBatchNormalizationFloatWorkload : public FloatWorkload { diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index 7b52f2784f..50cb9ded37 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -25,7 +25,8 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases, - bool isFastMathEnabled) + bool isFastMathEnabled, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); @@ -47,6 +48,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, arm_compute::PadStrideInfo layerInfo = BuildArmComputePadStrideInfo(descriptor); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::CLConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, optionalAclBiasesInfo, @@ -54,7 +58,7 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, layerInfo, arm_compute::WeightsInfo(), aclDilationInfo, - arm_compute::ActivationLayerInfo(), + activationInfo, isFastMathEnabled); } @@ -91,6 +95,8 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + m_ConvolutionLayer.configure(&input, m_KernelTensor.get(), m_BiasTensor.get(), @@ -98,7 +104,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip padStrideInfo, arm_compute::WeightsInfo(), aclDilationInfo, - arm_compute::ActivationLayerInfo(), + activationInfo, isFastMathEnabled); m_ConvolutionMethod = @@ -107,7 +113,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip output.info(), padStrideInfo, arm_compute::WeightsInfo(), - arm_compute::ActivationLayerInfo(), + activationInfo, arm_compute::CLScheduler::get().target(), aclDilationInfo, isFastMathEnabled); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index f769422a0a..70170b569d 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -23,7 +23,8 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, const Convolution2dDescriptor& descriptor, const TensorInfo& weights, const Optional& biases, - bool isFastMathEnabled = false); + bool isFastMathEnabled = false, + const ActivationDescriptor* activationDescriptor = nullptr); class ClConvolution2dWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 8704b1276f..53f16848eb 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -8,11 +8,13 @@ #include #include "ClWorkloadUtils.hpp" +#include #include #include #include #include #include +#include #include @@ -25,7 +27,8 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, - const Optional& biases) + const Optional& biases, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); @@ -56,13 +59,16 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp descriptor.m_DilationX, descriptor.m_DilationY); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + return arm_compute::CLDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, optionalAclBiasesInfo, &aclOutputInfo, aclPadStrideInfo, aclDepthMultiplier, - arm_compute::ActivationLayerInfo(), + activationInfo, aclDilationInfo); } @@ -114,6 +120,8 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + m_DepthwiseConvolutionLayer = std::make_unique(); static_cast(m_DepthwiseConvolutionLayer.get())->configure( &input, @@ -122,7 +130,7 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( &output, padStrideInfo, depthMultiplier, - arm_compute::ActivationLayerInfo(), + activationInfo, aclDilationInfo); ARMNN_ASSERT(m_DepthwiseConvolutionLayer); diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp index fc277b9947..c75913737d 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.hpp @@ -18,7 +18,8 @@ arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo& inp const TensorInfo& output, const DepthwiseConvolution2dDescriptor& descriptor, const TensorInfo& weights, - const Optional& biases); + const Optional& biases, + const ActivationDescriptor* activationDescriptor = nullptr); class ClDepthwiseConvolutionWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp index 2a27f8a9bc..c79e55ebdd 100644 --- a/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.cpp @@ -4,9 +4,12 @@ // #include "ClDivisionFloatWorkload.hpp" -#include + +#include #include +#include + #include "ClWorkloadUtils.hpp" namespace armnn @@ -14,13 +17,17 @@ namespace armnn arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); const arm_compute::TensorInfo aclOutput = armcomputetensorutils::BuildArmComputeTensorInfo(output); - return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + + return arm_compute::CLArithmeticDivision::validate(&aclInput1, &aclInput2, &aclOutput, activationInfo); } @@ -33,8 +40,10 @@ ClDivisionFloatWorkload::ClDivisionFloatWorkload(const DivisionQueueDescriptor& arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - // Construct - m_ArithmeticDivision.configure(&input0, &input1, &output); + + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + m_ArithmeticDivision.configure(&input0, &input1, &output, activationInfo); } void ClDivisionFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp index ddca87d78a..71d27ed5b5 100644 --- a/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClDivisionFloatWorkload.hpp @@ -14,7 +14,8 @@ namespace armnn arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); class ClDivisionFloatWorkload : public FloatWorkload { diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp index 60eb138b42..eaec639f28 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -20,7 +20,8 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor) + const FullyConnectedDescriptor& descriptor, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput = BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutput = BuildArmComputeTensorInfo(output); @@ -35,7 +36,7 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, } const arm_compute::FullyConnectedLayerInfo fullyConnectedLayerInfo = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor); + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor, activationDescriptor); return arm_compute::CLFullyConnectedLayer::validate(&aclInput, &aclWeights, @@ -63,9 +64,11 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(const FullyConnectedQueueDesc arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - // Construct - arm_compute::FullyConnectedLayerInfo fc_info; - fc_info.transpose_weights = m_Data.m_Parameters.m_TransposeWeightMatrix; + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + arm_compute::FullyConnectedLayerInfo fc_info = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); + m_FullyConnectedLayer.configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp index e13436eaa5..311b59498b 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.hpp @@ -19,7 +19,8 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, const TensorInfo& output, const TensorInfo& weights, const TensorInfo& biases, - const FullyConnectedDescriptor& descriptor); + const FullyConnectedDescriptor& descriptor, + const ActivationDescriptor* activationDescriptor = nullptr); class ClFullyConnectedWorkload : public armnn::BaseWorkload { diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp index e9b75c3f10..46a1c4bc59 100644 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp +++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp @@ -4,8 +4,12 @@ // #include "ClMultiplicationWorkload.hpp" -#include + +#include #include + +#include + #include "ClWorkloadUtils.hpp" namespace armnn @@ -13,7 +17,8 @@ namespace armnn arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput1 = armcomputetensorutils::BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput2 = armcomputetensorutils::BuildArmComputeTensorInfo(input1); @@ -23,6 +28,9 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, arm_compute::ConvertPolicy::SATURATE : arm_compute::ConvertPolicy::WRAP; + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + // At the time of writing, configure() will fail if a rounding policy other than TO_ZERO is supplied to it, // when providing a scale of 1.0 for F32 tensors, even though the provided rounding policy appears to be // ignored for F32 tensors. @@ -31,7 +39,8 @@ arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, &aclOutput, 1.0f, convertPolicy, - arm_compute::RoundingPolicy::TO_ZERO); + arm_compute::RoundingPolicy::TO_ZERO, + activationInfo); } @@ -50,13 +59,16 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc arm_compute::ConvertPolicy::SATURATE : arm_compute::ConvertPolicy::WRAP; + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + // Construct m_PixelWiseMultiplication.configure(&input0, &input1, &output, 1.0f, convertPolicy, - arm_compute::RoundingPolicy::TO_NEAREST_EVEN); + arm_compute::RoundingPolicy::TO_NEAREST_EVEN, + activationInfo); } void ClMultiplicationWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp index 732bb16dcc..461449cc35 100644 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.hpp +++ b/src/backends/cl/workloads/ClMultiplicationWorkload.hpp @@ -14,7 +14,8 @@ namespace armnn arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); class ClMultiplicationWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp index 38154eb4d7..c9fb556383 100644 --- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp +++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp @@ -7,9 +7,11 @@ #include #include +#include #include #include "ClWorkloadUtils.hpp" +#include "../../../../include/armnn/ArmNN.hpp" namespace armnn { @@ -26,7 +28,10 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d arm_compute::ICLTensor& input0 = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& input1 = static_cast(this->m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy); + + const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); + + m_Layer.configure(&input0, &input1, &output, g_AclConvertPolicy, activationInfo); } void ClSubtractionWorkload::Execute() const @@ -37,16 +42,21 @@ void ClSubtractionWorkload::Execute() const arm_compute::Status ClSubtractionValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output) + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInput0Info = BuildArmComputeTensorInfo(input0); const arm_compute::TensorInfo aclInput1Info = BuildArmComputeTensorInfo(input1); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( + activationDescriptor); + const arm_compute::Status aclStatus = arm_compute::CLArithmeticSubtraction::validate(&aclInput0Info, &aclInput1Info, &aclOutputInfo, - g_AclConvertPolicy); + g_AclConvertPolicy, + activationInfo); return aclStatus; } diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.hpp b/src/backends/cl/workloads/ClSubtractionWorkload.hpp index da6d17c6ac..9f51de645b 100644 --- a/src/backends/cl/workloads/ClSubtractionWorkload.hpp +++ b/src/backends/cl/workloads/ClSubtractionWorkload.hpp @@ -25,5 +25,6 @@ private: arm_compute::Status ClSubtractionValidate(const TensorInfo& input0, const TensorInfo& input1, - const TensorInfo& output); + const TensorInfo& output, + const ActivationDescriptor* activationDescriptor = nullptr); } //namespace armnn -- cgit v1.2.1