aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin May <kevin.may@arm.com>2021-10-22 15:42:28 +0100
committerKevin May <kevin.may@arm.com>2021-11-01 09:46:40 +0000
commit9f6862de94e3d15ea5207a5747012f6c7eead358 (patch)
tree4b45cab325cd7eb36fd04a1cba7d0e7a3ec92dbb
parentb1c62f11881e0d528bea5b3664a8f36e4c03b508 (diff)
downloadarmnn-9f6862de94e3d15ea5207a5747012f6c7eead358.tar.gz
IVGCVSW-6440 Add profiling around CL workload configure calls
Signed-off-by: Kevin May <kevin.may@arm.com> Change-Id: I7626d5bd82e832d5be6913719a34d76fbd1dbed8
-rw-r--r--src/backends/cl/workloads/ClAbsWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClActivationWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClArgMinMaxWorkload.cpp23
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp21
-rw-r--r--src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClCastWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClChannelShuffleWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClComparisonWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClConcatWorkload.cpp9
-rw-r--r--src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp5
-rw-r--r--src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp5
-rw-r--r--src/backends/cl/workloads/ClConvolution3dWorkload.cpp16
-rw-r--r--src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp23
-rw-r--r--src/backends/cl/workloads/ClDequantizeWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClDivisionWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClExpWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClFillWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClFloorFloatWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp15
-rw-r--r--src/backends/cl/workloads/ClGatherWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp15
-rw-r--r--src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClLogWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogicalAndWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogicalNotWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLogicalOrWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClLstmFloatWorkload.cpp19
-rw-r--r--src/backends/cl/workloads/ClMaximumWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClMinimumWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClMultiplicationWorkload.cpp21
-rw-r--r--src/backends/cl/workloads/ClNegWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClPadWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClPermuteWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClPooling2dWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClPreluWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClQLstmWorkload.cpp39
-rw-r--r--src/backends/cl/workloads/ClQuantizeWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp20
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.cpp13
-rw-r--r--src/backends/cl/workloads/ClReshapeWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClResizeWorkload.cpp21
-rw-r--r--src/backends/cl/workloads/ClRsqrtWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSinWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSliceWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSoftmaxWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp17
-rw-r--r--src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClSplitterWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClStackWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClStridedSliceWorkload.cpp21
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.cpp5
-rw-r--r--src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp6
-rw-r--r--src/backends/cl/workloads/ClTransposeWorkload.cpp13
58 files changed, 349 insertions, 175 deletions
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index fa8e4f737f..eeaec54439 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -33,8 +33,10 @@ ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
-
- m_AbsLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClAbsWorkload_configure");
+ m_AbsLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClAbsWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp
index 20a65b680e..229a291026 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.cpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.cpp
@@ -47,7 +47,10 @@ ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& desc
arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClActivationWorkload_configure");
+ m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo);
+ }
}
void ClActivationWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
index 9bef0603e1..55957d794a 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.cpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -30,8 +30,10 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
-
- m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClAdditionWorkload_configure");
+ m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ }
}
void ClAdditionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
index 78646a7f86..0bfb4e219b 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
@@ -70,17 +70,20 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip
auto unsignedAxis = armnnUtils::GetUnsignedAxis(numDims, m_Data.m_Parameters.m_Axis);
int aclAxis = armnn::numeric_cast<int>(CalcAclAxis(numDims, unsignedAxis));
- if (m_Data.m_Parameters.m_Function == ArgMinMaxFunction::Max)
{
- m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MAX);
- }
- else
- {
- m_ArgMinMaxLayer.configure(clCompileContext,
- &input,
- aclAxis,
- &output,
- arm_compute::ReductionOperation::ARG_IDX_MIN);
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClArgMinMaxWorkload_configure");
+ if (m_Data.m_Parameters.m_Function == ArgMinMaxFunction::Max)
+ {
+ m_ArgMinMaxLayer.configure(&input, aclAxis, &output, arm_compute::ReductionOperation::ARG_IDX_MAX);
+ }
+ else
+ {
+ m_ArgMinMaxLayer.configure(clCompileContext,
+ &input,
+ aclAxis,
+ &output,
+ arm_compute::ReductionOperation::ARG_IDX_MIN);
+ }
}
}
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
index 8367d7e266..fba1679a29 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -86,15 +86,18 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(clCompileContext,
- &input,
- &output,
- m_Mean.get(),
- m_Variance.get(),
- m_Beta.get(),
- m_Gamma.get(),
- m_Data.m_Parameters.m_Eps,
- activationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClBatchNormalizationFloatWorkload_configure");
+ m_Layer.configure(clCompileContext,
+ &input,
+ &output,
+ m_Mean.get(),
+ m_Variance.get(),
+ m_Beta.get(),
+ m_Gamma.get(),
+ m_Data.m_Parameters.m_Eps,
+ activationInfo);
+ }
InitializeArmComputeClTensorData(*m_Mean, m_Data.m_Mean);
InitializeArmComputeClTensorData(*m_Variance, m_Data.m_Variance);
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
index 8eef587644..28b408d0a2 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
@@ -42,7 +42,10 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClBatchToSpaceNdWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, blockWidth, blockHeight, &output);
+ }
}
void ClBatchToSpaceNdWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClCastWorkload.cpp b/src/backends/cl/workloads/ClCastWorkload.cpp
index 07b76dc064..9606385720 100644
--- a/src/backends/cl/workloads/ClCastWorkload.cpp
+++ b/src/backends/cl/workloads/ClCastWorkload.cpp
@@ -35,7 +35,10 @@ ClCastWorkload::ClCastWorkload(const CastQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_CastLayer.configure(clCompileContext, &input, &output, g_AclConvertPolicy);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClCastWorkload_configure");
+ m_CastLayer.configure(clCompileContext, &input, &output, g_AclConvertPolicy);
+ }
}
void ClCastWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp b/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp
index 751056a9a0..5d3e66c782 100644
--- a/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp
+++ b/src/backends/cl/workloads/ClChannelShuffleWorkload.cpp
@@ -86,7 +86,10 @@ ClChannelShuffleWorkload::ClChannelShuffleWorkload(const ChannelShuffleQueueDesc
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_ChannelShuffleLayer.configure(clCompileContext, &input, &output, descriptor.m_Parameters.m_NumGroups);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClChannelShuffleWorkload_configure");
+ m_ChannelShuffleLayer.configure(clCompileContext, &input, &output, descriptor.m_Parameters.m_NumGroups);
+ }
}
void ClChannelShuffleWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp
index d83682d81b..3d59e08475 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.cpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp
@@ -58,7 +58,10 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc
const arm_compute::ComparisonOperation comparisonOperation = ConvertComparisonOperationToAcl(m_Data.m_Parameters);
- m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClComparisonWorkload_configure");
+ m_ComparisonLayer.configure(clCompileContext, &input0, &input1, &output, comparisonOperation);
+ }
}
void ClComparisonWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp
index 233fd19542..58983c8896 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.cpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.cpp
@@ -88,9 +88,12 @@ ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor,
// Create the layer function
auto layer = std::make_unique<arm_compute::CLConcatenateLayer>();
- // Configure input and output tensors
- size_t aclAxis = CalcAxis(descriptor.m_Parameters);
- layer->configure(clCompileContext, aclInputs, &output, aclAxis);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConcatWorkload_configure");
+ // Configure input and output tensors
+ size_t aclAxis = CalcAxis(descriptor.m_Parameters);
+ layer->configure(clCompileContext, aclInputs, &output, aclAxis);
+ }
// Prepare
layer->prepare();
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
index 455ec1af13..ccea7c84b8 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
@@ -25,7 +25,10 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp16ToFp32Workload_configure");
+ m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
+ }
}
void ClConvertFp16ToFp32Workload::Execute() const
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
index 8e6b0cea67..9b38b22019 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
@@ -25,7 +25,10 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp32ToFp16Workload_configure");
+ m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0);
+ }
}
void ClConvertFp32ToFp16Workload::Execute() const
diff --git a/src/backends/cl/workloads/ClConvolution3dWorkload.cpp b/src/backends/cl/workloads/ClConvolution3dWorkload.cpp
index 18a2c31b51..baa2f05909 100644
--- a/src/backends/cl/workloads/ClConvolution3dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution3dWorkload.cpp
@@ -83,13 +83,15 @@ ClConvolution3dWorkload::ClConvolution3dWorkload(const Convolution3dQueueDescrip
const arm_compute::Conv3dInfo aclConv3DInfo = ComputeConv3DInfo(descriptor,
isFastMathEnabled);
- m_ConvolutionLayer.configure(clCompileContext,
- &input,
- &weights,
- biasesPtr,
- &output,
- aclConv3DInfo);
-
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution3dWorkload_configure");
+ m_ConvolutionLayer.configure(clCompileContext,
+ &input,
+ &weights,
+ biasesPtr,
+ &output,
+ aclConv3DInfo);
+ }
// Add details for profiling output
WorkloadInfo detailsInfo;
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
index aeab0293c1..75a87c7000 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
@@ -61,7 +61,10 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor
PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(clCompileContext, &input, &output, blockSize);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDepthToSpaceWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output, blockSize);
+ }
}
void ClDepthToSpaceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9592b37f9d..91c0018c93 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -135,17 +135,20 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
m_DepthwiseConvolutionLayer = std::make_unique<arm_compute::CLDepthwiseConvolutionLayer>();
- static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
- clCompileContext,
- &input,
- m_KernelTensor.get(),
- m_BiasTensor.get(),
- &output,
- padStrideInfo,
- depthMultiplier,
- activationInfo,
- aclDilationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDepthwiseConvolutionWorkload_configure");
+ static_cast<arm_compute::CLDepthwiseConvolutionLayer*>(m_DepthwiseConvolutionLayer.get())->configure(
+ clCompileContext,
+ &input,
+ m_KernelTensor.get(),
+ m_BiasTensor.get(),
+ &output,
+ padStrideInfo,
+ depthMultiplier,
+ activationInfo,
+ aclDilationInfo);
+ }
ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
ScopedTensorHandle weightsPermutedHandle(weightPermuted);
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
index 6bdeaa8fec..00d849c603 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
@@ -41,7 +41,10 @@ ClDequantizeWorkload::ClDequantizeWorkload(const DequantizeQueueDescriptor& desc
m_Data.m_Outputs[0])->GetTensor();
m_Layer.reset(new arm_compute::CLDequantizationLayer());
- m_Layer->configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDequantizeWorkload_configure");
+ m_Layer->configure(clCompileContext, &input, &output);
+ }
m_Layer->prepare();
}
diff --git a/src/backends/cl/workloads/ClDivisionWorkload.cpp b/src/backends/cl/workloads/ClDivisionWorkload.cpp
index d444a192cb..5df4c61bf5 100644
--- a/src/backends/cl/workloads/ClDivisionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDivisionWorkload.cpp
@@ -44,7 +44,10 @@ ClDivisionWorkload::ClDivisionWorkload(const DivisionQueueDescriptor& descriptor
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClDivisionWorkload_configure");
+ m_ArithmeticDivision.configure(clCompileContext, &input0, &input1, &output, activationInfo);
+ }
}
void ClDivisionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp
index 9c1f0368a3..eeb6637705 100644
--- a/src/backends/cl/workloads/ClExpWorkload.cpp
+++ b/src/backends/cl/workloads/ClExpWorkload.cpp
@@ -39,7 +39,10 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_ExpLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClExpWorkload_configure");
+ m_ExpLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClExpWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp
index ea42dcfc8b..2f95bc564c 100644
--- a/src/backends/cl/workloads/ClFillWorkload.cpp
+++ b/src/backends/cl/workloads/ClFillWorkload.cpp
@@ -31,7 +31,10 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor,
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
arm_compute::PixelValue pixelValue = GetPixelValue(output.info(), descriptor.m_Parameters.m_Value);
- m_Layer.configure(clCompileContext, &output, pixelValue);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFillWorkload_configure");
+ m_Layer.configure(clCompileContext, &output, pixelValue);
+ }
}
void ClFillWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
index d2b487169e..5db8cc6a7d 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
@@ -30,7 +30,10 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFloorFloatWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output);
+ }
}
void ClFloorFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index a0889e1b60..cc4ce9082c 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -88,12 +88,15 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
arm_compute::FullyConnectedLayerInfo fc_info =
ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
- m_FullyConnectedLayer.configure(clCompileContext,
- &input,
- m_WeightsTensor.get(),
- m_BiasesTensor.get(),
- &output,
- fc_info);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFullyConnectedWorkload_configure");
+ m_FullyConnectedLayer.configure(clCompileContext,
+ &input,
+ m_WeightsTensor.get(),
+ m_BiasesTensor.get(),
+ &output,
+ fc_info);
+ }
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp
index 7c8d1ab787..b2341b8f32 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.cpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.cpp
@@ -45,7 +45,10 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor,
int aclAxis = ComputeAclAxis(descriptor.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClGatherWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &indices, &output, aclAxis);
+ }
};
void ClGatherWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
index a4f20c5b6c..58e65ddab7 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
@@ -50,12 +50,15 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(clCompileContext,
- &input,
- &output,
- descriptor.m_Parameters.m_Gamma,
- descriptor.m_Parameters.m_Beta,
- descriptor.m_Parameters.m_Eps);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClInstanceNormalizationWorkload_configure");
+ m_Layer.configure(clCompileContext,
+ &input,
+ &output,
+ descriptor.m_Parameters.m_Gamma,
+ descriptor.m_Parameters.m_Beta,
+ descriptor.m_Parameters.m_Eps);
+ }
};
void ClInstanceNormalizationWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
index 953ff4aa9f..3b20ace1ed 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
@@ -48,7 +48,10 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza
int axis = (m_Data.m_Parameters.m_DataLayout == DataLayout::NCHW) ? 2 : 0;
- m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClL2NormalizationFloatWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output, axis, m_Data.m_Parameters.m_Eps);
+ }
}
void ClL2NormalizationFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
index 6c032111db..b75c6b0266 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
@@ -44,7 +44,11 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogSoftmaxWorkload_configure");
+ m_LogSoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ }
}
void ClLogSoftmaxWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogWorkload.cpp b/src/backends/cl/workloads/ClLogWorkload.cpp
index 180c0afd00..d13a0eaa3f 100644
--- a/src/backends/cl/workloads/ClLogWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogWorkload.cpp
@@ -33,7 +33,10 @@ ClLogWorkload::ClLogWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogWorkload_configure");
+ m_LogLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClLogWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
index 30a187be8a..481d87c4ff 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
@@ -48,7 +48,10 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d
arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogicalAndWorkload_configure");
+ m_LogicalAndLayer.configure(clCompileContext, &input0, &input1, &output);
+ }
}
void ClLogicalAndWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
index 4e95fcd266..c61f8443b7 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
@@ -44,7 +44,10 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalNotLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogicalNotWorkload_configure");
+ m_LogicalNotLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClLogicalNotWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
index b4eb11cb4d..307af2086a 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
@@ -48,7 +48,10 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des
arm_compute::ICLTensor& input1 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLogicalOrWorkload_configure");
+ m_LogicalOrLayer.configure(clCompileContext, &input0, &input1, &output);
+ }
}
void ClLogicalOrWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
index 709b14528e..9cbbff3dd9 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
@@ -193,14 +193,17 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor,
throw armnn::Exception("Wrong Type of Activation Function!");
}
- m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(),
- m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
- m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(),
- m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(),
- m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in,
- &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
- &cell_state_out, &output, lstm_param, activationLayerInfo,
- cell_threshold, projection_threshold);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClLstmFloatWorkload_configure");
+ m_LstmLayer.configure(clCompileContext, &input, m_InputToForgetWeightsTensor.get(),
+ m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
+ m_RecurrentToForgetWeightsTensor.get(), m_RecurrentToCellWeightsTensor.get(),
+ m_RecurrentToOutputWeightsTensor.get(), m_ForgetGateBiasTensor.get(),
+ m_CellBiasTensor.get(), m_OutputGateBiasTensor.get(), &output_state_in,
+ &cell_state_in, m_ScratchBuffer.get(), &output_state_out,
+ &cell_state_out, &output, lstm_param, activationLayerInfo,
+ cell_threshold, projection_threshold);
+ }
armcomputetensorutils::InitialiseArmComputeTensorEmpty(*m_ScratchBuffer);
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp
index 5a19c6949c..f10c609ef9 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp
@@ -47,7 +47,10 @@ ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMaximumWorkload_configure");
+ m_MaximumLayer.configure(clCompileContext, &input0, &input1, &output);
+ }
}
void ClMaximumWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
index cd79d04612..074b4b2061 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.cpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -47,7 +47,10 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
info.m_InputTensorInfos[0].GetNumDimensions(),
m_Data.m_Parameters.m_Axis);
- m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMeanWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, coords, m_Data.m_Parameters.m_KeepDims, &output);
+ }
}
void ClMeanWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp
index 22e928763d..d29dcc2950 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp
@@ -47,7 +47,10 @@ ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
arm_compute::ICLTensor& input1 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMinimumWorkload_configure");
+ m_MinimumLayer.configure(clCompileContext, &input0, &input1, &output);
+ }
}
void ClMinimumWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index b0b71ce3f5..e19a7a24b6 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -62,15 +62,18 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- // Construct
- m_PixelWiseMultiplication.configure(clCompileContext,
- &input0,
- &input1,
- &output,
- 1.0f,
- convertPolicy,
- arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
- activationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClMultiplicationWorkload_configure");
+ // Construct
+ m_PixelWiseMultiplication.configure(clCompileContext,
+ &input0,
+ &input1,
+ &output,
+ 1.0f,
+ convertPolicy,
+ arm_compute::RoundingPolicy::TO_NEAREST_EVEN,
+ activationInfo);
+ }
}
void ClMultiplicationWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index fb5b040dec..c606189e83 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -33,7 +33,10 @@ ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_NegLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClNegWorkload_configure");
+ m_NegLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClNegWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
index 9c6e0a1e97..9234a8a88b 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -50,7 +50,10 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu
arm_compute::NormalizationLayerInfo normalizationInfo = BuildArmComputeNormalizationLayerInfo(m_Data.m_Parameters);
- m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClNormalizationFloatWorkload_configure");
+ m_NormalizationLayer.configure(clCompileContext, &input, &output, normalizationInfo);
+ }
};
void ClNormalizationFloatWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
index 46975102db..48d61b0b8a 100644
--- a/src/backends/cl/workloads/ClPadWorkload.cpp
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -41,7 +41,10 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor,
arm_compute::PixelValue pixelValue = GetPixelValue(input.info(), descriptor.m_Parameters.m_PadValue);
- m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPadWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output, padList, pixelValue);
+ }
}
void ClPadWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp
index c7efe7a7ed..641e871d50 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.cpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp
@@ -45,8 +45,11 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
- // Run the layer.
- m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings));
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPermuteWorkload_configure");
+ // Run the layer.
+ m_PermuteFunction.configure(clCompileContext, &input, &output, BuildArmComputePermutationVector(mappings));
+ }
}
void ClPermuteWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
index ff441ef915..f967c6dd39 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
@@ -55,8 +55,11 @@ ClPooling2dWorkload::ClPooling2dWorkload(
arm_compute::PoolingLayerInfo layerInfo = BuildArmComputePoolingLayerInfo(m_Data.m_Parameters, fpMixedPrecision);
- // Run the layer.
- m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPooling2dWorkload_configure");
+ // Run the layer.
+ m_PoolingLayer.configure(clCompileContext, &input, &output, layerInfo);
+ }
}
void ClPooling2dWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp
index beb9e43573..449e4de48c 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.cpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.cpp
@@ -37,7 +37,10 @@ ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor,
arm_compute::ICLTensor& alpha = static_cast<IClTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_PreluLayer.configure(clCompileContext, &input, &alpha, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClPreluWorkload_configure");
+ m_PreluLayer.configure(clCompileContext, &input, &alpha, &output);
+ }
}
void ClPreluWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp
index d7c7af7e10..b2c1d6d63b 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp
@@ -158,24 +158,27 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
m_Data.m_Parameters.m_CellIntermediateScale,
m_Data.m_Parameters.m_OutputIntermediateScale);
- // QLSTM CL configure
- m_QLstmLayer.configure(clCompileContext,
- &input,
- m_InputToForgetWeightsTensor.get(),
- m_InputToCellWeightsTensor.get(),
- m_InputToOutputWeightsTensor.get(),
- m_RecurrentToForgetWeightsTensor.get(),
- m_RecurrentToCellWeightsTensor.get(),
- m_RecurrentToOutputWeightsTensor.get(),
- m_ForgetGateBiasTensor.get(),
- m_CellBiasTensor.get(),
- m_OutputGateBiasTensor.get(),
- &cellStateIn,
- &outputStateIn,
- &cellStateOut,
- &outputStateOut,
- &output,
- qLstmParams);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClQLstmWorkload_configure");
+ // QLSTM CL configure
+ m_QLstmLayer.configure(clCompileContext,
+ &input,
+ m_InputToForgetWeightsTensor.get(),
+ m_InputToCellWeightsTensor.get(),
+ m_InputToOutputWeightsTensor.get(),
+ m_RecurrentToForgetWeightsTensor.get(),
+ m_RecurrentToCellWeightsTensor.get(),
+ m_RecurrentToOutputWeightsTensor.get(),
+ m_ForgetGateBiasTensor.get(),
+ m_CellBiasTensor.get(),
+ m_OutputGateBiasTensor.get(),
+ &cellStateIn,
+ &outputStateIn,
+ &cellStateOut,
+ &outputStateOut,
+ &output,
+ qLstmParams);
+ }
// Initialise ACL tensor data for mandatory params
InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights);
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
index dc668fd6b4..5321e6292a 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
@@ -39,7 +39,10 @@ ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClQuantizeWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output);
+ }
}
void ClQuantizeWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
index 7bacf70a6a..05ae89c93b 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
@@ -109,14 +109,18 @@ ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescrip
arm_compute::ICLTensor& cellStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
arm_compute::ICLTensor& outputStateOutTensor = static_cast<IClTensorHandle*>(m_Data.m_Outputs[1])->GetTensor();
- m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(),
- m_InputToForgetWeightsTensor.get(),
- m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
- m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
- m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
- m_InputGateBiasTensor.get(), m_ForgetGateBiasTensor.get(), m_CellBiasTensor.get(),
- m_OutputGateBiasTensor.get(), &cellStateInTensor, &outputStateInTensor,
- &cellStateOutTensor, &outputStateOutTensor);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClQuantizedLstmWorkload_configure");
+ m_QuantizedLstmLayer.configure(clCompileContext, &inputTensor, m_InputToInputWeightsTensor.get(),
+ m_InputToForgetWeightsTensor.get(),
+ m_InputToCellWeightsTensor.get(), m_InputToOutputWeightsTensor.get(),
+ m_RecurrentToInputWeightsTensor.get(), m_RecurrentToForgetWeightsTensor.get(),
+ m_RecurrentToCellWeightsTensor.get(), m_RecurrentToOutputWeightsTensor.get(),
+ m_InputGateBiasTensor.get(), m_ForgetGateBiasTensor.get(),
+ m_CellBiasTensor.get(),
+ m_OutputGateBiasTensor.get(), &cellStateInTensor, &outputStateInTensor,
+ &cellStateOutTensor, &outputStateOutTensor);
+ }
InitializeArmComputeClTensorData(*m_InputToInputWeightsTensor, m_Data.m_InputToInputWeights);
InitializeArmComputeClTensorData(*m_InputToForgetWeightsTensor, m_Data.m_InputToForgetWeights);
diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp
index 1a7bc64420..b5f10292e5 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.cpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.cpp
@@ -60,11 +60,14 @@ ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, cons
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
info.m_InputTensorInfos[0].GetNumDimensions(),
m_Data.m_Parameters.m_vAxis);
- m_Layer.configure(&input,
- &output,
- static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(m_Data.m_Parameters),
- m_Data.m_Parameters.m_KeepDims);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClReduceWorkload_configure");
+ m_Layer.configure(&input,
+ &output,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(m_Data.m_Parameters),
+ m_Data.m_Parameters.m_KeepDims);
+ }
}
void ClReduceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp
index b9b92a8910..ece3166eea 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.cpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp
@@ -31,7 +31,10 @@ ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_Layer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClReshapeWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output);
+ }
}
void ClReshapeWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp
index 0c2b930039..8121429560 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.cpp
@@ -73,15 +73,18 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
? arm_compute::SamplingPolicy::CENTER
: arm_compute::SamplingPolicy::TOP_LEFT;
- m_ResizeLayer.configure(clCompileContext,
- &input,
- &output,
- arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
- arm_compute::BorderMode::REPLICATE,
- arm_compute::PixelValue(0.f),
- samplingPolicy,
- true,
- descriptor.m_Parameters.m_AlignCorners));
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClResizeWorkload_configure");
+ m_ResizeLayer.configure(clCompileContext,
+ &input,
+ &output,
+ arm_compute::ScaleKernelInfo(aclInterpolationPolicy,
+ arm_compute::BorderMode::REPLICATE,
+ arm_compute::PixelValue(0.f),
+ samplingPolicy,
+ true,
+ descriptor.m_Parameters.m_AlignCorners));
+ }
};
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index 8d48bfad33..b8ae2f6d59 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -33,7 +33,10 @@ ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_RsqrtLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClRsqrtWorkload_configure");
+ m_RsqrtLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClRsqrtWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSinWorkload.cpp b/src/backends/cl/workloads/ClSinWorkload.cpp
index dcde349d8d..2989ac9691 100644
--- a/src/backends/cl/workloads/ClSinWorkload.cpp
+++ b/src/backends/cl/workloads/ClSinWorkload.cpp
@@ -33,7 +33,10 @@ ClSinWorkload::ClSinWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
- m_SinLayer.configure(clCompileContext, &input, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSinWorkload_configure");
+ m_SinLayer.configure(clCompileContext, &input, &output);
+ }
}
void ClSinWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index 6f3c1a9402..f92bb378dc 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -51,7 +51,10 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor,
std::tie(starts, ends) = SetClSliceData(m_Data.m_Parameters.m_Begin, m_Data.m_Parameters.m_Size);
- m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSliceWorkload_configure");
+ m_SliceFunction.configure(clCompileContext, &input, &output, starts, ends);
+ }
}
void ClSliceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
index 0b7b10d7b0..39684d83c1 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
@@ -44,7 +44,10 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
int aclAxis = ComputeAclAxis(m_Data.m_Parameters.m_Axis, info.m_InputTensorInfos[0]);
- m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSoftmaxWorkload_configure");
+ m_SoftmaxLayer.configure(clCompileContext, &input, &output, m_Data.m_Parameters.m_Beta, aclAxis);
+ }
}
void ClSoftmaxWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
index 70166192e5..2bdfb38ade 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
@@ -76,13 +76,16 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_SpaceToBatchLayer.configure(clCompileContext,
- &input,
- blockWidth,
- blockHeight,
- paddingLeftTop,
- paddingRightBottom,
- &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSpaceToBatchNdWorkload_configure");
+ m_SpaceToBatchLayer.configure(clCompileContext,
+ &input,
+ blockWidth,
+ blockHeight,
+ paddingLeftTop,
+ paddingRightBottom,
+ &output);
+ }
}
void ClSpaceToBatchNdWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index 119605a02b..a2c9026f9f 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -39,7 +39,10 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
- m_Layer.configure(clCompileContext, &input, &output, blockSize);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSpaceToDepthWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, &output, blockSize);
+ }
}
void ClSpaceToDepthWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp
index b1ab17d6d2..a7d8a1aa7e 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.cpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp
@@ -102,7 +102,10 @@ ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor
unsigned int aclAxis = CalcAclAxis(descriptor.m_Parameters.GetNumDimensions(), *splitAxis.begin());
auto layer = std::make_unique<arm_compute::CLSplit>();
- layer->configure(&input, aclOutputs, aclAxis);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSplitterWorkload_configure");
+ layer->configure(&input, aclOutputs, aclAxis);
+ }
// Prepare
layer->prepare();
diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp
index 5070356dee..75842a2b06 100644
--- a/src/backends/cl/workloads/ClStackWorkload.cpp
+++ b/src/backends/cl/workloads/ClStackWorkload.cpp
@@ -66,7 +66,10 @@ ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor,
m_Layer.reset(new arm_compute::CLStackLayer());
int aclAxis = CalcAxis(descriptor.m_Parameters.m_Axis, descriptor.m_Parameters.m_InputShape.GetNumDimensions());
- m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClStackWorkload_configure");
+ m_Layer->configure(clCompileContext, aclInputs, aclAxis, &output);
+ }
}
void ClStackWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
index 51a77c54ad..b2e73cb684 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
@@ -85,15 +85,18 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor
input.info()->set_data_layout(aclDataLayout);
output.info()->set_data_layout(aclDataLayout);
- m_StridedSliceLayer.configure(clCompileContext,
- &input,
- &output,
- starts,
- ends,
- strides,
- begin_mask,
- end_mask,
- shrink_axis_mask);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClStridedSliceWorkload_configure");
+ m_StridedSliceLayer.configure(clCompileContext,
+ &input,
+ &output,
+ starts,
+ ends,
+ strides,
+ begin_mask,
+ end_mask,
+ shrink_axis_mask);
+ }
}
void ClStridedSliceWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
index 6465e3e050..797763d381 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -31,7 +31,10 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClSubtractionWorkload_configure");
+ m_Layer.configure(clCompileContext, &input0, &input1, &output, g_AclConvertPolicy, activationInfo);
+ }
}
void ClSubtractionWorkload::Execute() const
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
index c37907e156..9277bb0b19 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
@@ -100,7 +100,11 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload(
output.info()->set_data_layout(aclDataLayout);
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
- m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, padStrideInfo);
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClTransposeConvolution2dWorkload_configure");
+ m_Layer.configure(clCompileContext, &input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output,
+ padStrideInfo);
+ }
InitializeArmComputeClTensorData(*m_WeightsTensor, m_Data.m_Weight);
if (m_BiasesTensor)
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp
index d80eae87ea..d52806b9d4 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp
@@ -42,11 +42,14 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip
const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const armnn::PermutationVector& mappings = m_Data.m_Parameters.m_DimMappings;
- // Run the layer.
- m_PermuteFunction.configure(clCompileContext,
- &input,
- &output,
- armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
+ {
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClTransposeWorkload_configure");
+ // Run the layer.
+ m_PermuteFunction.configure(clCompileContext,
+ &input,
+ &output,
+ armcomputetensorutils::BuildArmComputeTransposeVector(mappings));
+ }
}
void ClTransposeWorkload::Execute() const