From 9f6862de94e3d15ea5207a5747012f6c7eead358 Mon Sep 17 00:00:00 2001 From: Kevin May Date: Fri, 22 Oct 2021 15:42:28 +0100 Subject: IVGCVSW-6440 Add profiling around CL workload configure calls Signed-off-by: Kevin May Change-Id: I7626d5bd82e832d5be6913719a34d76fbd1dbed8 --- src/backends/cl/workloads/ClAbsWorkload.cpp | 6 ++-- src/backends/cl/workloads/ClActivationWorkload.cpp | 5 ++- src/backends/cl/workloads/ClAdditionWorkload.cpp | 6 ++-- src/backends/cl/workloads/ClArgMinMaxWorkload.cpp | 23 +++++++------ .../ClBatchNormalizationFloatWorkload.cpp | 21 +++++++----- .../cl/workloads/ClBatchToSpaceNdWorkload.cpp | 5 ++- src/backends/cl/workloads/ClCastWorkload.cpp | 5 ++- .../cl/workloads/ClChannelShuffleWorkload.cpp | 5 ++- src/backends/cl/workloads/ClComparisonWorkload.cpp | 5 ++- src/backends/cl/workloads/ClConcatWorkload.cpp | 9 +++-- .../cl/workloads/ClConvertFp16ToFp32Workload.cpp | 5 ++- .../cl/workloads/ClConvertFp32ToFp16Workload.cpp | 5 ++- .../cl/workloads/ClConvolution3dWorkload.cpp | 16 +++++---- .../cl/workloads/ClDepthToSpaceWorkload.cpp | 5 ++- .../workloads/ClDepthwiseConvolutionWorkload.cpp | 23 +++++++------ src/backends/cl/workloads/ClDequantizeWorkload.cpp | 5 ++- src/backends/cl/workloads/ClDivisionWorkload.cpp | 5 ++- src/backends/cl/workloads/ClExpWorkload.cpp | 5 ++- src/backends/cl/workloads/ClFillWorkload.cpp | 5 ++- src/backends/cl/workloads/ClFloorFloatWorkload.cpp | 5 ++- .../cl/workloads/ClFullyConnectedWorkload.cpp | 15 +++++---- src/backends/cl/workloads/ClGatherWorkload.cpp | 5 ++- .../workloads/ClInstanceNormalizationWorkload.cpp | 15 +++++---- .../workloads/ClL2NormalizationFloatWorkload.cpp | 5 ++- src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp | 6 +++- src/backends/cl/workloads/ClLogWorkload.cpp | 5 ++- src/backends/cl/workloads/ClLogicalAndWorkload.cpp | 5 ++- src/backends/cl/workloads/ClLogicalNotWorkload.cpp | 5 ++- src/backends/cl/workloads/ClLogicalOrWorkload.cpp | 5 ++- src/backends/cl/workloads/ClLstmFloatWorkload.cpp | 19 ++++++----- src/backends/cl/workloads/ClMaximumWorkload.cpp | 5 ++- src/backends/cl/workloads/ClMeanWorkload.cpp | 5 ++- src/backends/cl/workloads/ClMinimumWorkload.cpp | 5 ++- .../cl/workloads/ClMultiplicationWorkload.cpp | 21 +++++++----- src/backends/cl/workloads/ClNegWorkload.cpp | 5 ++- .../cl/workloads/ClNormalizationFloatWorkload.cpp | 5 ++- src/backends/cl/workloads/ClPadWorkload.cpp | 5 ++- src/backends/cl/workloads/ClPermuteWorkload.cpp | 7 ++-- src/backends/cl/workloads/ClPooling2dWorkload.cpp | 7 ++-- src/backends/cl/workloads/ClPreluWorkload.cpp | 5 ++- src/backends/cl/workloads/ClQLstmWorkload.cpp | 39 ++++++++++++---------- src/backends/cl/workloads/ClQuantizeWorkload.cpp | 5 ++- .../cl/workloads/ClQuantizedLstmWorkload.cpp | 20 ++++++----- src/backends/cl/workloads/ClReduceWorkload.cpp | 13 +++++--- src/backends/cl/workloads/ClReshapeWorkload.cpp | 5 ++- src/backends/cl/workloads/ClResizeWorkload.cpp | 21 +++++++----- src/backends/cl/workloads/ClRsqrtWorkload.cpp | 5 ++- src/backends/cl/workloads/ClSinWorkload.cpp | 5 ++- src/backends/cl/workloads/ClSliceWorkload.cpp | 5 ++- src/backends/cl/workloads/ClSoftmaxWorkload.cpp | 5 ++- .../cl/workloads/ClSpaceToBatchNdWorkload.cpp | 17 ++++++---- .../cl/workloads/ClSpaceToDepthWorkload.cpp | 5 ++- src/backends/cl/workloads/ClSplitterWorkload.cpp | 5 ++- src/backends/cl/workloads/ClStackWorkload.cpp | 5 ++- .../cl/workloads/ClStridedSliceWorkload.cpp | 21 +++++++----- .../cl/workloads/ClSubtractionWorkload.cpp | 5 ++- .../workloads/ClTransposeConvolution2dWorkload.cpp | 6 +++- src/backends/cl/workloads/ClTransposeWorkload.cpp | 13 +++++--- 58 files changed, 349 insertions(+), 175 deletions(-) diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp index fa8e4f737f..eeaec54439 100644 --- a/src/backends/cl/workloads/ClAbsWorkload.cpp +++ b/src/backends/cl/workloads/ClAbsWorkload.cpp @@ -33,8 +33,10 @@ ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - - m_AbsLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClAbsWorkload_configure"); + m_AbsLayer.configure(clCompileContext, &input, &output); + } } void ClAbsWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp index 20a65b680e..229a291026 100644 --- a/src/backends/cl/workloads/ClActivationWorkload.cpp +++ b/src/backends/cl/workloads/ClActivationWorkload.cpp @@ -47,7 +47,10 @@ ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& desc arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClActivationWorkload_configure"); + m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo); + } } void ClActivationWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp index 9bef0603e1..55957d794a 100644 --- a/src/backends/cl/workloads/ClAdditionWorkload.cpp +++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp @@ -30,8 +30,10 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - - m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClAdditionWorkload_configure"); + m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo); + } } void ClAdditionWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp index 78646a7f86..0bfb4e219b 100644 --- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp +++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp @@ -70,17 +70,20 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, m_Data.m_Parameters.m_Axis); int aclAxis = armnn::numeric_cast(CalcAclAxis(numDims, unsignedAxis)); - if (m_Data.m_Parameters.m_Function == ArgMinMaxFunction::Max) { - m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MAX); - } - else - { - m_ArgMinMaxLayer.configure(clCompileContext, - &input, - aclAxis, - &output, - arm_compute::ReductionOperation::ARG_IDX_MIN); + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClArgMinMaxWorkload_configure"); + if (m_Data.m_Parameters.m_Function == ArgMinMaxFunction::Max) + { + m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MAX); + } + else + { + m_ArgMinMaxLayer.configure(clCompileContext, + &input, + aclAxis, + &output, + arm_compute::ReductionOperation::ARG_IDX_MIN); + } } } diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp index 8367d7e266..fba1679a29 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -86,15 +86,18 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_Layer.configure(clCompileContext, - &input, - &output, - m_Mean.get(), - m_Variance.get(), - m_Beta.get(), - m_Gamma.get(), - m_Data.m_Parameters.m_Eps, - activationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClBatchNormalizationFloatWorkload_configure"); + m_Layer.configure(clCompileContext, + &input, + &output, + m_Mean.get(), + m_Variance.get(), + m_Beta.get(), + m_Gamma.get(), + m_Data.m_Parameters.m_Eps, + activationInfo); + } InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean); InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance); diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp index 8eef587644..28b408d0a2 100644 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp @@ -42,7 +42,10 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClBatchToSpaceNdWorkload_configure"); + m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output); + } } void ClBatchToSpaceNdWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClCastWorkload.cpp b/src/backends/cl/workloads/ClCastWorkload.cpp index 07b76dc064..9606385720 100644 --- a/src/backends/cl/workloads/ClCastWorkload.cpp +++ b/src/backends/cl/workloads/ClCastWorkload.cpp @@ -35,7 +35,10 @@ ClCastWorkload::ClCastWorkload(const CastQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_CastLayer.configure(clCompileContext, &input, &output, g_AclConvertPolicy); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClCastWorkload_configure"); + m_CastLayer.configure(clCompileContext, &input, &output, g_AclConvertPolicy); + } } void ClCastWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp b/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp index 751056a9a0..5d3e66c782 100644 --- a/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp +++ b/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp @@ -86,7 +86,10 @@ ClChannelShuffleWorkload::ClChannelShuffleWorkload(const ChannelShuffleQueueDesc input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_ChannelShuffleLayer.configure(clCompileContext, &input, &output, descriptor.m_Parameters.m_NumGroups); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClChannelShuffleWorkload_configure"); + m_ChannelShuffleLayer.configure(clCompileContext, &input, &output, descriptor.m_Parameters.m_NumGroups); + } } void ClChannelShuffleWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp index d83682d81b..3d59e08475 100644 --- a/src/backends/cl/workloads/ClComparisonWorkload.cpp +++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp @@ -58,7 +58,10 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(m_Data.m_Parameters); - m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClComparisonWorkload_configure"); + m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation); + } } void ClComparisonWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp index 233fd19542..58983c8896 100644 --- a/src/backends/cl/workloads/ClConcatWorkload.cpp +++ b/src/backends/cl/workloads/ClConcatWorkload.cpp @@ -88,9 +88,12 @@ ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, // Create the layer function auto layer = std::make_unique(); - // Configure input and output tensors - size_t aclAxis = CalcAxis(descriptor.m_Parameters); - layer->configure(clCompileContext, aclInputs, &output, aclAxis); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConcatWorkload_configure"); + // Configure input and output tensors + size_t aclAxis = CalcAxis(descriptor.m_Parameters); + layer->configure(clCompileContext, aclInputs, &output, aclAxis); + } // Prepare layer->prepare(); diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp index 455ec1af13..ccea7c84b8 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -25,7 +25,10 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp16ToFp32Workload_configure"); + m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + } } void ClConvertFp16ToFp32Workload::Execute() const diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp index 8e6b0cea67..9b38b22019 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -25,7 +25,10 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp32ToFp16Workload_configure"); + m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + } } void ClConvertFp32ToFp16Workload::Execute() const diff --git a/src/backends/cl/workloads/ClConvolution3dWorkload.cpp b/src/backends/cl/workloads/ClConvolution3dWorkload.cpp index 18a2c31b51..baa2f05909 100644 --- a/src/backends/cl/workloads/ClConvolution3dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution3dWorkload.cpp @@ -83,13 +83,15 @@ ClConvolution3dWorkload::ClConvolution3dWorkload(const Convolution3dQueueDescrip const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor, isFastMathEnabled); - m_ConvolutionLayer.configure(clCompileContext, - &input, - &weights, - biasesPtr, - &output, - aclConv3DInfo); - + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution3dWorkload_configure"); + m_ConvolutionLayer.configure(clCompileContext, + &input, + &weights, + biasesPtr, + &output, + aclConv3DInfo); + } // Add details for profiling output WorkloadInfo detailsInfo; diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp index aeab0293c1..75a87c7000 100644 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp @@ -61,7 +61,10 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor PolymorphicPointerDowncast(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(clCompileContext, &input, &output, blockSize); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDepthToSpaceWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output, blockSize); + } } void ClDepthToSpaceWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 9592b37f9d..91c0018c93 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -135,17 +135,20 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); m_DepthwiseConvolutionLayer = std::make_unique(); - static_cast(m_DepthwiseConvolutionLayer.get())->configure( - clCompileContext, - &input, - m_KernelTensor.get(), - m_BiasTensor.get(), - &output, - padStrideInfo, - depthMultiplier, - activationInfo, - aclDilationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDepthwiseConvolutionWorkload_configure"); + static_cast(m_DepthwiseConvolutionLayer.get())->configure( + clCompileContext, + &input, + m_KernelTensor.get(), + m_BiasTensor.get(), + &output, + padStrideInfo, + depthMultiplier, + activationInfo, + aclDilationInfo); + } ARMNN_ASSERT(m_DepthwiseConvolutionLayer); ScopedTensorHandle weightsPermutedHandle(weightPermuted); diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp index 6bdeaa8fec..00d849c603 100644 --- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp +++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp @@ -41,7 +41,10 @@ ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& desc m_Data.m_Outputs[0])->GetTensor(); m_Layer.reset(new arm_compute::CLDequantizationLayer()); - m_Layer->configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDequantizeWorkload_configure"); + m_Layer->configure(clCompileContext, &input, &output); + } m_Layer->prepare(); } diff --git a/src/backends/cl/workloads/ClDivisionWorkload.cpp b/src/backends/cl/workloads/ClDivisionWorkload.cpp index d444a192cb..5df4c61bf5 100644 --- a/src/backends/cl/workloads/ClDivisionWorkload.cpp +++ b/src/backends/cl/workloads/ClDivisionWorkload.cpp @@ -44,7 +44,10 @@ ClDivisionWorkload::ClDivisionWorkload(const DivisionQueueDescriptor& descriptor const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDivisionWorkload_configure"); + m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo); + } } void ClDivisionWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp index 9c1f0368a3..eeb6637705 100644 --- a/src/backends/cl/workloads/ClExpWorkload.cpp +++ b/src/backends/cl/workloads/ClExpWorkload.cpp @@ -39,7 +39,10 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_ExpLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClExpWorkload_configure"); + m_ExpLayer.configure(clCompileContext, &input, &output); + } } void ClExpWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp index ea42dcfc8b..2f95bc564c 100644 --- a/src/backends/cl/workloads/ClFillWorkload.cpp +++ b/src/backends/cl/workloads/ClFillWorkload.cpp @@ -31,7 +31,10 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); arm_compute::PixelValue pixelValue = GetPixelValue(output.info(), descriptor.m_Parameters.m_Value); - m_Layer.configure(clCompileContext, &output, pixelValue); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFillWorkload_configure"); + m_Layer.configure(clCompileContext, &output, pixelValue); + } } void ClFillWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp index d2b487169e..5db8cc6a7d 100644 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp @@ -30,7 +30,10 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFloorFloatWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output); + } } void ClFloorFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp index a0889e1b60..cc4ce9082c 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -88,12 +88,15 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload( arm_compute::FullyConnectedLayerInfo fc_info = ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); - m_FullyConnectedLayer.configure(clCompileContext, - &input, - m_WeightsTensor.get(), - m_BiasesTensor.get(), - &output, - fc_info); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFullyConnectedWorkload_configure"); + m_FullyConnectedLayer.configure(clCompileContext, + &input, + m_WeightsTensor.get(), + m_BiasesTensor.get(), + &output, + fc_info); + } InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp index 7c8d1ab787..b2341b8f32 100644 --- a/src/backends/cl/workloads/ClGatherWorkload.cpp +++ b/src/backends/cl/workloads/ClGatherWorkload.cpp @@ -45,7 +45,10 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor, int aclAxis = ComputeAclAxis(descriptor.m_Parameters.m_Axis, info.m_InputTensorInfos[0]); - m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClGatherWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis); + } }; void ClGatherWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp index a4f20c5b6c..58e65ddab7 100644 --- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp +++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp @@ -50,12 +50,15 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(clCompileContext, - &input, - &output, - descriptor.m_Parameters.m_Gamma, - descriptor.m_Parameters.m_Beta, - descriptor.m_Parameters.m_Eps); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClInstanceNormalizationWorkload_configure"); + m_Layer.configure(clCompileContext, + &input, + &output, + descriptor.m_Parameters.m_Gamma, + descriptor.m_Parameters.m_Beta, + descriptor.m_Parameters.m_Eps); + } }; void ClInstanceNormalizationWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp index 953ff4aa9f..3b20ace1ed 100644 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp @@ -48,7 +48,10 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0; - m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClL2NormalizationFloatWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps); + } } void ClL2NormalizationFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp index 6c032111db..b75c6b0266 100644 --- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp +++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp @@ -44,7 +44,11 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]); - m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis); + + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogSoftmaxWorkload_configure"); + m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis); + } } void ClLogSoftmaxWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogWorkload.cpp b/src/backends/cl/workloads/ClLogWorkload.cpp index 180c0afd00..d13a0eaa3f 100644 --- a/src/backends/cl/workloads/ClLogWorkload.cpp +++ b/src/backends/cl/workloads/ClLogWorkload.cpp @@ -33,7 +33,10 @@ ClLogWorkload::ClLogWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_LogLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogWorkload_configure"); + m_LogLayer.configure(clCompileContext, &input, &output); + } } void ClLogWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp index 30a187be8a..481d87c4ff 100644 --- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp @@ -48,7 +48,10 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d arm_compute::ICLTensor& input1 = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogicalAndWorkload_configure"); + m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output); + } } void ClLogicalAndWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp index 4e95fcd266..c61f8443b7 100644 --- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp @@ -44,7 +44,10 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_LogicalNotLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogicalNotWorkload_configure"); + m_LogicalNotLayer.configure(clCompileContext, &input, &output); + } } void ClLogicalNotWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp index b4eb11cb4d..307af2086a 100644 --- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp @@ -48,7 +48,10 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des arm_compute::ICLTensor& input1 = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogicalOrWorkload_configure"); + m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output); + } } void ClLogicalOrWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp index 709b14528e..9cbbff3dd9 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -193,14 +193,17 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, throw armnn::Exception("Wrong Type of Activation Function!"); } - m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(), - m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), - m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(), - m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(), - m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in, - &cell_state_in, m_ScratchBuffer.get(), &output_state_out, - &cell_state_out, &output, lstm_param, activationLayerInfo, - cell_threshold, projection_threshold); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLstmFloatWorkload_configure"); + m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(), + m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), + m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(), + m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(), + m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in, + &cell_state_in, m_ScratchBuffer.get(), &output_state_out, + &cell_state_out, &output, lstm_param, activationLayerInfo, + cell_threshold, projection_threshold); + } armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer); diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp index 5a19c6949c..f10c609ef9 100644 --- a/src/backends/cl/workloads/ClMaximumWorkload.cpp +++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp @@ -47,7 +47,10 @@ ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMaximumWorkload_configure"); + m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output); + } } void ClMaximumWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp index cd79d04612..074b4b2061 100644 --- a/src/backends/cl/workloads/ClMeanWorkload.cpp +++ b/src/backends/cl/workloads/ClMeanWorkload.cpp @@ -47,7 +47,10 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, info.m_InputTensorInfos[0].GetNumDimensions(), m_Data.m_Parameters.m_Axis); - m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMeanWorkload_configure"); + m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output); + } } void ClMeanWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp index 22e928763d..d29dcc2950 100644 --- a/src/backends/cl/workloads/ClMinimumWorkload.cpp +++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp @@ -47,7 +47,10 @@ ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, arm_compute::ICLTensor& input1 = static_cast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMinimumWorkload_configure"); + m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output); + } } void ClMinimumWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp index b0b71ce3f5..e19a7a24b6 100644 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp +++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp @@ -62,15 +62,18 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - // Construct - m_PixelWiseMultiplication.configure(clCompileContext, - &input0, - &input1, - &output, - 1.0f, - convertPolicy, - arm_compute::RoundingPolicy::TO_NEAREST_EVEN, - activationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMultiplicationWorkload_configure"); + // Construct + m_PixelWiseMultiplication.configure(clCompileContext, + &input0, + &input1, + &output, + 1.0f, + convertPolicy, + arm_compute::RoundingPolicy::TO_NEAREST_EVEN, + activationInfo); + } } void ClMultiplicationWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp index fb5b040dec..c606189e83 100644 --- a/src/backends/cl/workloads/ClNegWorkload.cpp +++ b/src/backends/cl/workloads/ClNegWorkload.cpp @@ -33,7 +33,10 @@ ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_NegLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClNegWorkload_configure"); + m_NegLayer.configure(clCompileContext, &input, &output); + } } void ClNegWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp index 9c6e0a1e97..9234a8a88b 100644 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp @@ -50,7 +50,10 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters); - m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClNormalizationFloatWorkload_configure"); + m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo); + } }; void ClNormalizationFloatWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp index 46975102db..48d61b0b8a 100644 --- a/src/backends/cl/workloads/ClPadWorkload.cpp +++ b/src/backends/cl/workloads/ClPadWorkload.cpp @@ -41,7 +41,10 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, arm_compute::PixelValue pixelValue = GetPixelValue(input.info(), descriptor.m_Parameters.m_PadValue); - m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPadWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue); + } } void ClPadWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp index c7efe7a7ed..641e871d50 100644 --- a/src/backends/cl/workloads/ClPermuteWorkload.cpp +++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp @@ -45,8 +45,11 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - // Run the layer. - m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings)); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPermuteWorkload_configure"); + // Run the layer. + m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings)); + } } void ClPermuteWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp index ff441ef915..f967c6dd39 100644 --- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp +++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp @@ -55,8 +55,11 @@ ClPooling2dWorkload::ClPooling2dWorkload( arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters, fpMixedPrecision); - // Run the layer. - m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPooling2dWorkload_configure"); + // Run the layer. + m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo); + } } void ClPooling2dWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp index beb9e43573..449e4de48c 100644 --- a/src/backends/cl/workloads/ClPreluWorkload.cpp +++ b/src/backends/cl/workloads/ClPreluWorkload.cpp @@ -37,7 +37,10 @@ ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor, arm_compute::ICLTensor& alpha = static_cast(m_Data.m_Inputs[1])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_PreluLayer.configure(clCompileContext, &input, &alpha, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPreluWorkload_configure"); + m_PreluLayer.configure(clCompileContext, &input, &alpha, &output); + } } void ClPreluWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp index d7c7af7e10..b2c1d6d63b 100644 --- a/src/backends/cl/workloads/ClQLstmWorkload.cpp +++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp @@ -158,24 +158,27 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, m_Data.m_Parameters.m_CellIntermediateScale, m_Data.m_Parameters.m_OutputIntermediateScale); - // QLSTM CL configure - m_QLstmLayer.configure(clCompileContext, - &input, - m_InputToForgetWeightsTensor.get(), - m_InputToCellWeightsTensor.get(), - m_InputToOutputWeightsTensor.get(), - m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), - m_RecurrentToOutputWeightsTensor.get(), - m_ForgetGateBiasTensor.get(), - m_CellBiasTensor.get(), - m_OutputGateBiasTensor.get(), - &cellStateIn, - &outputStateIn, - &cellStateOut, - &outputStateOut, - &output, - qLstmParams); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClQLstmWorkload_configure"); + // QLSTM CL configure + m_QLstmLayer.configure(clCompileContext, + &input, + m_InputToForgetWeightsTensor.get(), + m_InputToCellWeightsTensor.get(), + m_InputToOutputWeightsTensor.get(), + m_RecurrentToForgetWeightsTensor.get(), + m_RecurrentToCellWeightsTensor.get(), + m_RecurrentToOutputWeightsTensor.get(), + m_ForgetGateBiasTensor.get(), + m_CellBiasTensor.get(), + m_OutputGateBiasTensor.get(), + &cellStateIn, + &outputStateIn, + &cellStateOut, + &outputStateOut, + &output, + qLstmParams); + } // Initialise ACL tensor data for mandatory params InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp index dc668fd6b4..5321e6292a 100644 --- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp +++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp @@ -39,7 +39,10 @@ ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClQuantizeWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output); + } } void ClQuantizeWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp index 7bacf70a6a..05ae89c93b 100644 --- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp +++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp @@ -109,14 +109,18 @@ ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescrip arm_compute::ICLTensor& cellStateOutTensor = static_cast(m_Data.m_Outputs[0])->GetTensor(); arm_compute::ICLTensor& outputStateOutTensor = static_cast(m_Data.m_Outputs[1])->GetTensor(); - m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(), - m_InputToForgetWeightsTensor.get(), - m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), - m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), - m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), - m_InputGateBiasTensor.get(), m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(), - m_OutputGateBiasTensor.get(), &cellStateInTensor, &outputStateInTensor, - &cellStateOutTensor, &outputStateOutTensor); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClQuantizedLstmWorkload_configure"); + m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(), + m_InputToForgetWeightsTensor.get(), + m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(), + m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(), + m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(), + m_InputGateBiasTensor.get(), m_ForgetGateBiasTensor.get(), + m_CellBiasTensor.get(), + m_OutputGateBiasTensor.get(), &cellStateInTensor, &outputStateInTensor, + &cellStateOutTensor, &outputStateOutTensor); + } InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights); InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights); diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp index 1a7bc64420..b5f10292e5 100644 --- a/src/backends/cl/workloads/ClReduceWorkload.cpp +++ b/src/backends/cl/workloads/ClReduceWorkload.cpp @@ -60,11 +60,14 @@ ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, cons arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), info.m_InputTensorInfos[0].GetNumDimensions(), m_Data.m_Parameters.m_vAxis); - m_Layer.configure(&input, - &output, - static_cast(coords[0]), - ConvertReductionOperationToAcl(m_Data.m_Parameters), - m_Data.m_Parameters.m_KeepDims); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClReduceWorkload_configure"); + m_Layer.configure(&input, + &output, + static_cast(coords[0]), + ConvertReductionOperationToAcl(m_Data.m_Parameters), + m_Data.m_Parameters.m_KeepDims); + } } void ClReduceWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp index b9b92a8910..ece3166eea 100644 --- a/src/backends/cl/workloads/ClReshapeWorkload.cpp +++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp @@ -31,7 +31,10 @@ ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - m_Layer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClReshapeWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output); + } } void ClReshapeWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp index 0c2b930039..8121429560 100644 --- a/src/backends/cl/workloads/ClResizeWorkload.cpp +++ b/src/backends/cl/workloads/ClResizeWorkload.cpp @@ -73,15 +73,18 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, ? arm_compute::SamplingPolicy::CENTER : arm_compute::SamplingPolicy::TOP_LEFT; - m_ResizeLayer.configure(clCompileContext, - &input, - &output, - arm_compute::ScaleKernelInfo(aclInterpolationPolicy, - arm_compute::BorderMode::REPLICATE, - arm_compute::PixelValue(0.f), - samplingPolicy, - true, - descriptor.m_Parameters.m_AlignCorners)); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClResizeWorkload_configure"); + m_ResizeLayer.configure(clCompileContext, + &input, + &output, + arm_compute::ScaleKernelInfo(aclInterpolationPolicy, + arm_compute::BorderMode::REPLICATE, + arm_compute::PixelValue(0.f), + samplingPolicy, + true, + descriptor.m_Parameters.m_AlignCorners)); + } }; diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp index 8d48bfad33..b8ae2f6d59 100644 --- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp +++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp @@ -33,7 +33,10 @@ ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_RsqrtLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClRsqrtWorkload_configure"); + m_RsqrtLayer.configure(clCompileContext, &input, &output); + } } void ClRsqrtWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSinWorkload.cpp b/src/backends/cl/workloads/ClSinWorkload.cpp index dcde349d8d..2989ac9691 100644 --- a/src/backends/cl/workloads/ClSinWorkload.cpp +++ b/src/backends/cl/workloads/ClSinWorkload.cpp @@ -33,7 +33,10 @@ ClSinWorkload::ClSinWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); - m_SinLayer.configure(clCompileContext, &input, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSinWorkload_configure"); + m_SinLayer.configure(clCompileContext, &input, &output); + } } void ClSinWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp index 6f3c1a9402..f92bb378dc 100644 --- a/src/backends/cl/workloads/ClSliceWorkload.cpp +++ b/src/backends/cl/workloads/ClSliceWorkload.cpp @@ -51,7 +51,10 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, std::tie(starts, ends) = SetClSliceData(m_Data.m_Parameters.m_Begin, m_Data.m_Parameters.m_Size); - m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSliceWorkload_configure"); + m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends); + } } void ClSliceWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp index 0b7b10d7b0..39684d83c1 100644 --- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp +++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp @@ -44,7 +44,10 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]); - m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSoftmaxWorkload_configure"); + m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis); + } } void ClSoftmaxWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp index 70166192e5..2bdfb38ade 100644 --- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp +++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp @@ -76,13 +76,16 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload( input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_SpaceToBatchLayer.configure(clCompileContext, - &input, - blockWidth, - blockHeight, - paddingLeftTop, - paddingRightBottom, - &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSpaceToBatchNdWorkload_configure"); + m_SpaceToBatchLayer.configure(clCompileContext, + &input, + blockWidth, + blockHeight, + paddingLeftTop, + paddingRightBottom, + &output); + } } void ClSpaceToBatchNdWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp index 119605a02b..a2c9026f9f 100644 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp +++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp @@ -39,7 +39,10 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); - m_Layer.configure(clCompileContext, &input, &output, blockSize); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSpaceToDepthWorkload_configure"); + m_Layer.configure(clCompileContext, &input, &output, blockSize); + } } void ClSpaceToDepthWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp index b1ab17d6d2..a7d8a1aa7e 100644 --- a/src/backends/cl/workloads/ClSplitterWorkload.cpp +++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp @@ -102,7 +102,10 @@ ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor unsigned int aclAxis = CalcAclAxis(descriptor.m_Parameters.GetNumDimensions(), *splitAxis.begin()); auto layer = std::make_unique(); - layer->configure(&input, aclOutputs, aclAxis); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSplitterWorkload_configure"); + layer->configure(&input, aclOutputs, aclAxis); + } // Prepare layer->prepare(); diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp index 5070356dee..75842a2b06 100644 --- a/src/backends/cl/workloads/ClStackWorkload.cpp +++ b/src/backends/cl/workloads/ClStackWorkload.cpp @@ -66,7 +66,10 @@ ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, m_Layer.reset(new arm_compute::CLStackLayer()); int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions()); - m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClStackWorkload_configure"); + m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output); + } } void ClStackWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp index 51a77c54ad..b2e73cb684 100644 --- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp +++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp @@ -85,15 +85,18 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor input.info()->set_data_layout(aclDataLayout); output.info()->set_data_layout(aclDataLayout); - m_StridedSliceLayer.configure(clCompileContext, - &input, - &output, - starts, - ends, - strides, - begin_mask, - end_mask, - shrink_axis_mask); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClStridedSliceWorkload_configure"); + m_StridedSliceLayer.configure(clCompileContext, + &input, + &output, + starts, + ends, + strides, + begin_mask, + end_mask, + shrink_axis_mask); + } } void ClStridedSliceWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp index 6465e3e050..797763d381 100644 --- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp +++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp @@ -31,7 +31,10 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSubtractionWorkload_configure"); + m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo); + } } void ClSubtractionWorkload::Execute() const diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp index c37907e156..9277bb0b19 100644 --- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp @@ -100,7 +100,11 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload( output.info()->set_data_layout(aclDataLayout); arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); - m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClTransposeConvolution2dWorkload_configure"); + m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, + padStrideInfo); + } InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight); if (m_BiasesTensor) diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp index d80eae87ea..d52806b9d4 100644 --- a/src/backends/cl/workloads/ClTransposeWorkload.cpp +++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp @@ -42,11 +42,14 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings; - // Run the layer. - m_PermuteFunction.configure(clCompileContext, - &input, - &output, - armcomputetensorutils::BuildArmComputeTransposeVector(mappings)); + { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClTransposeWorkload_configure"); + // Run the layer. + m_PermuteFunction.configure(clCompileContext, + &input, + &output, + armcomputetensorutils::BuildArmComputeTransposeVector(mappings)); + } } void ClTransposeWorkload::Execute() const -- cgit v1.2.1