aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Davis <keith.davis@arm.com>2021-08-05 14:20:33 +0100
committerMatthew Sloyan <matthew.sloyan@arm.com>2021-08-10 13:51:31 +0000
commitbcd860a30eba22bb2ba0943ad705734ce0ec5b23 (patch)
treea54dc121df65036f1604389684c76cdd2c988a4a
parent2d0679f33f75c43e7169fe0f0ee2d15d0620e091 (diff)
downloadarmnn-bcd860a30eba22bb2ba0943ad705734ce0ec5b23.tar.gz
IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in CL
Signed-off-by: Keith Davis <keith.davis@arm.com> Change-Id: I92dd410da7ad633a46d025fdc2b26093041c439b
-rw-r--r--src/backends/cl/workloads/ClAbsWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClActivationWorkload.cpp10
-rw-r--r--src/backends/cl/workloads/ClAdditionWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClArgMinMaxWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp24
-rw-r--r--src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp2
-rw-r--r--src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp25
-rw-r--r--src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp2
-rw-r--r--src/backends/cl/workloads/ClCastWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClComparisonWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClConcatWorkload.cpp12
-rw-r--r--src/backends/cl/workloads/ClConstantWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp2
-rw-r--r--src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp2
-rw-r--r--src/backends/cl/workloads/ClConvolution2dWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp20
-rw-r--r--src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp2
-rw-r--r--src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp25
-rw-r--r--src/backends/cl/workloads/ClDequantizeWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClDivisionWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClExpWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClFillWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClFloorFloatWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp30
-rw-r--r--src/backends/cl/workloads/ClGatherWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClLogWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClLogicalAndWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClLogicalNotWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClLogicalOrWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClLstmFloatWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClMaximumWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.cpp13
-rw-r--r--src/backends/cl/workloads/ClMeanWorkload.hpp2
-rw-r--r--src/backends/cl/workloads/ClMinimumWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClMultiplicationWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClNegWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClPadWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClPermuteWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClPooling2dWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClPreluWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClQLstmWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClQuantizeWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.cpp22
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.hpp2
-rw-r--r--src/backends/cl/workloads/ClReshapeWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClResizeWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClRsqrtWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClSinWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClSliceWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClSoftmaxWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp19
-rw-r--r--src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp2
-rw-r--r--src/backends/cl/workloads/ClSplitterWorkload.cpp7
-rw-r--r--src/backends/cl/workloads/ClStackWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClStridedSliceWorkload.cpp8
-rw-r--r--src/backends/cl/workloads/ClSubtractionWorkload.cpp2
-rw-r--r--src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp19
-rw-r--r--src/backends/cl/workloads/ClTransposeWorkload.cpp8
64 files changed, 360 insertions, 113 deletions
diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp
index 4682c646d1..fa8e4f737f 100644
--- a/src/backends/cl/workloads/ClAbsWorkload.cpp
+++ b/src/backends/cl/workloads/ClAbsWorkload.cpp
@@ -39,7 +39,7 @@ ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor,
void ClAbsWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClAbsWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClAbsWorkload_Execute", this->GetGuid());
RunClFunction(m_AbsLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp
index e2f64a9d7e..20a65b680e 100644
--- a/src/backends/cl/workloads/ClActivationWorkload.cpp
+++ b/src/backends/cl/workloads/ClActivationWorkload.cpp
@@ -34,19 +34,25 @@ ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& desc
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ActivationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClActivationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1);
const arm_compute::ActivationLayerInfo activationLayerInfo =
ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters);
- arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<ClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo);
}
void ClActivationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClActivationWorkload_Execute", this->GetGuid());
RunClFunction(m_ActivationLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp
index 4793cc6f8f..9bef0603e1 100644
--- a/src/backends/cl/workloads/ClAdditionWorkload.cpp
+++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp
@@ -36,7 +36,7 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor
void ClAdditionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClAdditionWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
index 7475cfa315..78646a7f86 100644
--- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp
@@ -57,6 +57,12 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClArgMinMaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
@@ -80,7 +86,7 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip
void ClArgMinMaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClArgMinMaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClArgMinMaxWorkload_Execute", this->GetGuid());
RunClFunction(m_ArgMinMaxLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
index 361d6f87a5..8367d7e266 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp
@@ -22,21 +22,21 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& desc,
+ const BatchNormalizationDescriptor& descriptor,
const ActivationDescriptor* activationDescriptor)
{
const arm_compute::TensorInfo aclInputInfo =
- armcomputetensorutils::BuildArmComputeTensorInfo(input, desc.m_DataLayout);
+ armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo =
- armcomputetensorutils::BuildArmComputeTensorInfo(output, desc.m_DataLayout);
+ armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclMeanInfo =
- armcomputetensorutils::BuildArmComputeTensorInfo(mean, desc.m_DataLayout);
+ armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclVarInfo =
- armcomputetensorutils::BuildArmComputeTensorInfo(var, desc.m_DataLayout);
+ armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclBetaInfo =
- armcomputetensorutils::BuildArmComputeTensorInfo(beta, desc.m_DataLayout);
+ armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclGammaInfo =
- armcomputetensorutils::BuildArmComputeTensorInfo(gamma, desc.m_DataLayout);
+ armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout);
const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
activationDescriptor);
@@ -47,7 +47,7 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
&aclVarInfo,
&aclBetaInfo,
&aclGammaInfo,
- desc.m_Eps,
+ descriptor.m_Eps,
activationInfo);
}
@@ -57,6 +57,12 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Mean = std::make_unique<arm_compute::CLTensor>();
BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo());
@@ -103,7 +109,7 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload(
void ClBatchNormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchNormalizationFloatWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
index c9f1f7f295..0ba2d97e8f 100644
--- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp
@@ -19,7 +19,7 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input,
const TensorInfo& var,
const TensorInfo& beta,
const TensorInfo& gamma,
- const BatchNormalizationDescriptor& desc,
+ const BatchNormalizationDescriptor& descriptor,
const ActivationDescriptor* activationDescriptor = nullptr);
class ClBatchNormalizationFloatWorkload : public FloatWorkload<BatchNormalizationQueueDescriptor>
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
index b9736db642..8eef587644 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp
@@ -17,11 +17,17 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
+ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor,
const WorkloadInfo& info,
const arm_compute::CLCompileContext& clCompileContext)
- : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+ : BaseWorkload<BatchToSpaceNdQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchToSpaceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -30,8 +36,8 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc
input.info()->set_data_layout(aclDataLayout);
// ArmNN blockShape is [H, W] Cl asks for W, H
- int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[0]);
- int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[1]);
+ int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[0]);
+ int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[1]);
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
@@ -41,19 +47,20 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc
void ClBatchToSpaceNdWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchToSpaceNdWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchToSpaceNdWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const BatchToSpaceNdDescriptor& desc) {
- DataLayout dataLayout = desc.m_DataLayout;
+ const BatchToSpaceNdDescriptor& descriptor)
+{
+ DataLayout dataLayout = descriptor.m_DataLayout;
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
// ArmNN blockShape is [H, W] Cl asks for W, H
- int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_BlockShape[0]);
- int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_BlockShape[1]);
+ int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
+ int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
index 2262f33c73..7ef8300f89 100644
--- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
+++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp
@@ -13,7 +13,7 @@ namespace armnn
arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const BatchToSpaceNdDescriptor& desc);
+ const BatchToSpaceNdDescriptor& descriptor);
class ClBatchToSpaceNdWorkload : public BaseWorkload<BatchToSpaceNdQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClCastWorkload.cpp b/src/backends/cl/workloads/ClCastWorkload.cpp
index e995e42386..07b76dc064 100644
--- a/src/backends/cl/workloads/ClCastWorkload.cpp
+++ b/src/backends/cl/workloads/ClCastWorkload.cpp
@@ -40,7 +40,7 @@ ClCastWorkload::ClCastWorkload(const CastQueueDescriptor& descriptor,
void ClCastWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClCastWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClCastWorkload_Execute", this->GetGuid());
RunClFunction(m_CastLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp
index 35e6d68733..d83682d81b 100644
--- a/src/backends/cl/workloads/ClComparisonWorkload.cpp
+++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp
@@ -44,6 +44,12 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ComparisonQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonComparisonWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClComparisonWorkload", 2, 1);
arm_compute::ICLTensor& input0 = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -57,7 +63,7 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc
void ClComparisonWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClComparisonWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClComparisonWorkload_Execute", this->GetGuid());
RunClFunction(m_ComparisonLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp
index 1c2d476e0c..233fd19542 100644
--- a/src/backends/cl/workloads/ClConcatWorkload.cpp
+++ b/src/backends/cl/workloads/ClConcatWorkload.cpp
@@ -18,9 +18,9 @@ using namespace armcomputetensorutils;
namespace
{
-size_t CalcAxis(const OriginsDescriptor& desc)
+size_t CalcAxis(const OriginsDescriptor& descriptor)
{
- return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+ return (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1;
}
} //namespace
@@ -50,6 +50,12 @@ ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConcatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
bool allInputsAreSubtensors = true;
// Check that all inputs are sub-tensors
@@ -95,7 +101,7 @@ void ClConcatWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClConcatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConcatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp
index 60dcd59268..1ff7504058 100644
--- a/src/backends/cl/workloads/ClConstantWorkload.cpp
+++ b/src/backends/cl/workloads/ClConstantWorkload.cpp
@@ -51,7 +51,7 @@ ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor
void ClConstantWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConstantWorkload_Execute", this->GetGuid());
// The intermediate tensor held by the corresponding layer output handler can be initialised with the given data
// on the first inference, then reused for subsequent inferences.
diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
index aaffd83741..455ec1af13 100644
--- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp
@@ -30,7 +30,7 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload(
void ClConvertFp16ToFp32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvertFp16ToFp32Workload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
index a9f1d91bcf..8e6b0cea67 100644
--- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
+++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp
@@ -30,7 +30,7 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload(
void ClConvertFp32ToFp16Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvertFp32ToFp16Workload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
index ab9d5bcbd2..12a47dcd94 100644
--- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp
@@ -132,7 +132,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip
}
// Report Profiling Details
- ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Execute_Guid",
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Construct",
descriptor.m_Parameters,
detailsInfo,
this->GetGuid());
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
index d42b261a10..aeab0293c1 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp
@@ -21,12 +21,12 @@ using namespace armcomputetensorutils;
arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const DepthToSpaceDescriptor& desc)
+ const DepthToSpaceDescriptor& descriptor)
{
- DataLayout dataLayout = desc.m_DataLayout;
+ DataLayout dataLayout = descriptor.m_DataLayout;
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
@@ -36,11 +36,17 @@ arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
return aclStatus;
}
-ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc,
+ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor,
const WorkloadInfo& info,
const arm_compute::CLCompileContext& clCompileContext)
- : BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info)
+ : BaseWorkload<DepthToSpaceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthToSpaceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -49,7 +55,7 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor
PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Inputs[0])->GetTensor();
input.info()->set_data_layout(aclDataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
arm_compute::ICLTensor& output =
PolymorphicPointerDowncast<IClTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
@@ -60,7 +66,7 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor
void ClDepthToSpaceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthToSpaceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthToSpaceWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
index 6cb8bb5e9e..01f83331c5 100644
--- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
+++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp
@@ -16,7 +16,7 @@ namespace armnn
arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const DepthToSpaceDescriptor& desc);
+ const DepthToSpaceDescriptor& descriptor);
class ClDepthToSpaceWorkload : public BaseWorkload<DepthToSpaceQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
index 9a9977bd54..9592b37f9d 100644
--- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp
@@ -78,15 +78,32 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload(
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<DepthwiseConvolution2dQueueDescriptor>(descriptor, info)
{
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthwiseConvolutionWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
// ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M]
// Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library
ConstTensor weightPermuted;
unsigned int depthMultiplier;
std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]);
std::tie(weightPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight,
- info.m_InputTensorInfos[0],
- m_Data.m_Parameters.m_DataLayout,
- permuteBuffer.get());
+ info.m_InputTensorInfos[0],
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
// Convert the weights into the compute library format
m_KernelTensor = std::make_unique<arm_compute::CLTensor>();
@@ -151,7 +168,7 @@ void ClDepthwiseConvolutionWorkload::FreeUnusedTensors()
void ClDepthwiseConvolutionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthwiseConvolutionWorkload_Execute", this->GetGuid());
ARMNN_ASSERT(m_DepthwiseConvolutionLayer);
RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION());
diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
index 42cc579a8c..6bdeaa8fec 100644
--- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp
@@ -49,7 +49,7 @@ void ClDequantizeWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClDequantizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDequantizeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/cl/workloads/ClDivisionWorkload.cpp b/src/backends/cl/workloads/ClDivisionWorkload.cpp
index 76220a1b64..d444a192cb 100644
--- a/src/backends/cl/workloads/ClDivisionWorkload.cpp
+++ b/src/backends/cl/workloads/ClDivisionWorkload.cpp
@@ -49,7 +49,7 @@ ClDivisionWorkload::ClDivisionWorkload(const DivisionQueueDescriptor& descriptor
void ClDivisionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDivisionWorkload_Execute", this->GetGuid());
RunClFunction(m_ArithmeticDivision, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp
index 60c383f8bf..9c1f0368a3 100644
--- a/src/backends/cl/workloads/ClExpWorkload.cpp
+++ b/src/backends/cl/workloads/ClExpWorkload.cpp
@@ -28,6 +28,12 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClExpWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClExpWorkload", 1, 1);
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -38,7 +44,7 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
void ClExpWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClExpWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClExpWorkload_Execute", this->GetGuid());
RunClFunction(m_ExpLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp
index a2204fa42d..8cb2db4b25 100644
--- a/src/backends/cl/workloads/ClFillWorkload.cpp
+++ b/src/backends/cl/workloads/ClFillWorkload.cpp
@@ -20,6 +20,12 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<FillQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFillWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClFillWorkload", 1, 1);
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(this->m_Data.m_Outputs[0])->GetTensor();
@@ -30,7 +36,7 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor,
void ClFillWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClFillWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFillWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
index 3915270c24..d2b487169e 100644
--- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp
@@ -35,7 +35,7 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto
void ClFloorFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFloorFloatWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index d1d911ac13..a0889e1b60 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -28,10 +28,10 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
arm_compute::TensorInfo aclBiases;
- arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
- aclBiases = BuildArmComputeTensorInfo(biases);
+ aclBiases = BuildArmComputeTensorInfo(biases);
optionalAclBiases = &aclBiases;
}
@@ -50,9 +50,25 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
const arm_compute::CLCompileContext& clCompileContext)
- : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
- , m_FullyConnectedLayer(memoryManager)
+ : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info), m_FullyConnectedLayer(memoryManager)
{
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFullyConnectedWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
@@ -64,13 +80,13 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1);
- arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
arm_compute::FullyConnectedLayerInfo fc_info =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
m_FullyConnectedLayer.configure(clCompileContext,
&input,
@@ -94,7 +110,7 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
void ClFullyConnectedWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFullyConnectedWorkload_Execute", this->GetGuid());
RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp
index 98dfe7bc81..7c8d1ab787 100644
--- a/src/backends/cl/workloads/ClGatherWorkload.cpp
+++ b/src/backends/cl/workloads/ClGatherWorkload.cpp
@@ -31,6 +31,12 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<GatherQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClGatherWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClGatherWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -44,7 +50,7 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor,
void ClGatherWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClGatherWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClGatherWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
} // namespace armnn
diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
index a0e8e7b87d..a4f20c5b6c 100644
--- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
+++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp
@@ -35,6 +35,12 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload(
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClInstanceNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -54,7 +60,7 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload(
void ClInstanceNormalizationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClInstanceNormalizationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClInstanceNormalizationWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
index 984f21a4db..953ff4aa9f 100644
--- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp
@@ -31,6 +31,12 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza
const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClL2NormalizationFloatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza
void ClL2NormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClL2NormalizationFloatWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
index 1a255f13f6..6c032111db 100644
--- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp
@@ -32,6 +32,12 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc
: BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info)
, m_LogSoftmaxLayer(memoryManager)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogSoftmaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClLogSoftmaxWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -43,7 +49,7 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc
void ClLogSoftmaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogSoftmaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogSoftmaxWorkload_Execute", this->GetGuid());
RunClFunction(m_LogSoftmaxLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClLogWorkload.cpp b/src/backends/cl/workloads/ClLogWorkload.cpp
index b35345f1ce..180c0afd00 100644
--- a/src/backends/cl/workloads/ClLogWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogWorkload.cpp
@@ -38,7 +38,7 @@ ClLogWorkload::ClLogWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
void ClLogWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogWorkload_Execute", this->GetGuid());
RunClFunction(m_LogLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
index f04cede2f8..30a187be8a 100644
--- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp
@@ -36,6 +36,12 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalAndWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClLogicalAndWorkload", 2, 1);
arm_compute::ICLTensor& input0 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d
void ClLogicalAndWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalAndWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalAndWorkload_Execute", this->GetGuid());
m_LogicalAndLayer.run();
}
diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
index 475e57f8dc..4e95fcd266 100644
--- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp
@@ -33,6 +33,12 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalNotWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClLogicalNotWorkload", 1, 1);
arm_compute::ICLTensor& input = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -43,7 +49,7 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor
void ClLogicalNotWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalNotWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalNotWorkload_Execute", this->GetGuid());
m_LogicalNotLayer.run();
}
diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
index 355310ef5a..b4eb11cb4d 100644
--- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
+++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp
@@ -36,6 +36,12 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalOrWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClLogicalOrWorkload", 2, 1);
arm_compute::ICLTensor& input0 = PolymorphicDowncast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des
void ClLogicalOrWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalOrWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalOrWorkload_Execute", this->GetGuid());
m_LogicalOrLayer.run();
}
diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
index 908f20bfe5..709b14528e 100644
--- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp
@@ -24,6 +24,12 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<LstmQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLstmFloatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::LSTMParams<arm_compute::ICLTensor> lstm_param;
// Basic parameters
@@ -254,7 +260,7 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor,
void ClLstmFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClLstmFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLstmFloatWorkload_Execute", this->GetGuid());
RunClFunction(m_LstmLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp
index 0aa15e5dd3..5a19c6949c 100644
--- a/src/backends/cl/workloads/ClMaximumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp
@@ -52,7 +52,7 @@ ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor,
void ClMaximumWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClMaximumWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMaximumWorkload_Execute", this->GetGuid());
RunClFunction(m_MaximumLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp
index 4cc0f7c1c2..cd79d04612 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.cpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.cpp
@@ -16,16 +16,16 @@ using namespace armcomputetensorutils;
arm_compute::Status ClMeanValidate(const TensorInfo& input,
const TensorInfo& output,
- const MeanDescriptor& desc)
+ const MeanDescriptor& descriptor)
{
const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
- desc.m_Axis);
+ descriptor.m_Axis);
- return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+ return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
}
ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
@@ -33,6 +33,11 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<MeanQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClMeanWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +52,7 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor,
void ClMeanWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMeanWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp
index 04e9fe23f2..c9229acf17 100644
--- a/src/backends/cl/workloads/ClMeanWorkload.hpp
+++ b/src/backends/cl/workloads/ClMeanWorkload.hpp
@@ -14,7 +14,7 @@ namespace armnn
arm_compute::Status ClMeanValidate(const TensorInfo& input,
const TensorInfo& output,
- const MeanDescriptor& desc);
+ const MeanDescriptor& descriptor);
class ClMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp
index 4924002432..22e928763d 100644
--- a/src/backends/cl/workloads/ClMinimumWorkload.cpp
+++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp
@@ -52,7 +52,7 @@ ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor,
void ClMinimumWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClMinimumWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMinimumWorkload_Execute", this->GetGuid());
RunClFunction(m_MinimumLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
index 2bd1e1615a..b0b71ce3f5 100644
--- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
+++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp
@@ -75,7 +75,7 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc
void ClMultiplicationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMultiplicationWorkload_Execute", this->GetGuid());
RunClFunction(m_PixelWiseMultiplication, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp
index 7505ab608a..fb5b040dec 100644
--- a/src/backends/cl/workloads/ClNegWorkload.cpp
+++ b/src/backends/cl/workloads/ClNegWorkload.cpp
@@ -38,7 +38,7 @@ ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
void ClNegWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClNegWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClNegWorkload_Execute", this->GetGuid());
RunClFunction(m_NegLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
index e9b2caf6ee..9c6e0a1e97 100644
--- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
+++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp
@@ -33,6 +33,12 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu
const arm_compute::CLCompileContext& clCompileContext)
: FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -49,7 +55,7 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu
void ClNormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClNormalizationFloatWorkload_Execute", this->GetGuid());
RunClFunction(m_NormalizationLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp
index 533855c295..10c8907d43 100644
--- a/src/backends/cl/workloads/ClPadWorkload.cpp
+++ b/src/backends/cl/workloads/ClPadWorkload.cpp
@@ -20,6 +20,12 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PadQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPadWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(this->m_Data.m_Inputs[0])->GetTensor();
@@ -40,7 +46,7 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor,
void ClPadWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPadWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp
index 5aadc7629e..c7efe7a7ed 100644
--- a/src/backends/cl/workloads/ClPermuteWorkload.cpp
+++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp
@@ -31,6 +31,12 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<PermuteQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPermuteWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
using armcomputetensorutils::BuildArmComputePermutationVector;
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
@@ -45,7 +51,7 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor,
void ClPermuteWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(GetName() + "_Execute", this->GetGuid());
RunClFunction(m_PermuteFunction, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
index c7cc10218a..ff441ef915 100644
--- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp
@@ -33,6 +33,12 @@ ClPooling2dWorkload::ClPooling2dWorkload(
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPooling2dWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -55,7 +61,7 @@ ClPooling2dWorkload::ClPooling2dWorkload(
void ClPooling2dWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPooling2dWorkload_Execute", this->GetGuid());
RunClFunction(m_PoolingLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp
index 9b45441b02..beb9e43573 100644
--- a/src/backends/cl/workloads/ClPreluWorkload.cpp
+++ b/src/backends/cl/workloads/ClPreluWorkload.cpp
@@ -42,7 +42,7 @@ ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor,
void ClPreluWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClPreluWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPreluWorkload_Execute", this->GetGuid());
RunClFunction(m_PreluLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp
index 0ae371575b..d7c7af7e10 100644
--- a/src/backends/cl/workloads/ClQLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp
@@ -19,6 +19,12 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<QLstmQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClQLstmWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::LSTMParams<arm_compute::ICLTensor> qLstmParams;
// Mandatory params
@@ -231,6 +237,7 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor,
void ClQLstmWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizedLstmWorkload_Execute", this->GetGuid());
m_QLstmLayer.run();
}
diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
index 527c64013b..dc668fd6b4 100644
--- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp
@@ -44,7 +44,7 @@ ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor
void ClQuantizeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizeWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
index d50414b1cf..7bacf70a6a 100644
--- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
+++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp
@@ -137,7 +137,7 @@ ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescrip
void ClQuantizedLstmWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizedLstmWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizedLstmWorkload_Execute", this->GetGuid());
RunClFunction(m_QuantizedLstmLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp
index c9c2e0a400..1a7bc64420 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.cpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.cpp
@@ -17,28 +17,28 @@ using namespace armcomputetensorutils;
arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const ReduceDescriptor& desc)
+ const ReduceDescriptor& descriptor)
{
- if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty())
+ if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty())
{
- const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
- desc.m_vAxis);
+ descriptor.m_vAxis);
return arm_compute::CLReductionOperation::validate(&aclInputInfo,
&aclOutputInfo,
static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(desc),
- desc.m_KeepDims);
+ ConvertReductionOperationToAcl(descriptor),
+ descriptor.m_KeepDims);
}
else
{
// Validate layer if there are multiple axes.
arm_compute::Status status;
- IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, desc, status);
+ IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status);
return status;
}
}
@@ -46,6 +46,12 @@ arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClReduceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClReduceWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -63,7 +69,7 @@ ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, cons
void ClReduceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClReduceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClReduceWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/cl/workloads/ClReduceWorkload.hpp b/src/backends/cl/workloads/ClReduceWorkload.hpp
index 8481eeea5a..8b0aadb1ae 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.hpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.hpp
@@ -14,7 +14,7 @@ namespace armnn
arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const ReduceDescriptor& desc);
+ const ReduceDescriptor& descriptor);
class ClReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp
index 1f82cfbee2..b9b92a8910 100644
--- a/src/backends/cl/workloads/ClReshapeWorkload.cpp
+++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp
@@ -36,7 +36,7 @@ ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor,
void ClReshapeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClReshapeWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp
index 3406011d04..0c2b930039 100644
--- a/src/backends/cl/workloads/ClResizeWorkload.cpp
+++ b/src/backends/cl/workloads/ClResizeWorkload.cpp
@@ -51,6 +51,12 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClResizeWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -81,7 +87,7 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor,
void ClResizeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClResizeWorkload_Execute", this->GetGuid());
RunClFunction(m_ResizeLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
index a3a04c11eb..8d48bfad33 100644
--- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp
+++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp
@@ -38,7 +38,7 @@ ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor,
void ClRsqrtWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClRsqrtWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClRsqrtWorkload_Execute", this->GetGuid());
RunClFunction(m_RsqrtLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClSinWorkload.cpp b/src/backends/cl/workloads/ClSinWorkload.cpp
index 17572c657b..dcde349d8d 100644
--- a/src/backends/cl/workloads/ClSinWorkload.cpp
+++ b/src/backends/cl/workloads/ClSinWorkload.cpp
@@ -38,7 +38,7 @@ ClSinWorkload::ClSinWorkload(const ElementwiseUnaryQueueDescriptor& descriptor,
void ClSinWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSinWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSinWorkload_Execute", this->GetGuid());
RunClFunction(m_SinLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp
index 16271961f9..6f3c1a9402 100644
--- a/src/backends/cl/workloads/ClSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClSliceWorkload.cpp
@@ -35,6 +35,12 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SliceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSliceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1);
arm_compute::ICLTensor& input = PolymorphicDowncast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -50,7 +56,7 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor,
void ClSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSliceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSliceWorkload_Execute", this->GetGuid());
RunClFunction(m_SliceFunction, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
index 4547c682c9..0b7b10d7b0 100644
--- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
+++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp
@@ -32,6 +32,12 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
: BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info)
, m_SoftmaxLayer(memoryManager)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSoftmaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClSoftmaxWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<ClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -43,7 +49,7 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor,
void ClSoftmaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSoftmaxWorkload_Execute", this->GetGuid());
RunClFunction(m_SoftmaxLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
index 3aa8ebd2a8..70166192e5 100644
--- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp
@@ -50,6 +50,12 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSpaceToBatchNdWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1);
arm_compute::ICLTensor& input =
@@ -81,7 +87,7 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload(
void ClSpaceToBatchNdWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToBatchNdWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSpaceToBatchNdWorkload_Execute", this->GetGuid());
RunClFunction(m_SpaceToBatchLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
index 67487c4bf1..119605a02b 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp
@@ -17,11 +17,16 @@ namespace armnn
{
using namespace armcomputetensorutils;
-ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
+ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor,
const WorkloadInfo& info,
const arm_compute::CLCompileContext& clCompileContext)
- : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
+ : BaseWorkload<SpaceToDepthQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSpaceToDepthWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -29,7 +34,7 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
input.info()->set_data_layout(aclDataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
@@ -39,18 +44,18 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor
void ClSpaceToDepthWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToDepthWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSpaceToDepthWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const SpaceToDepthDescriptor& desc)
+ const SpaceToDepthDescriptor& descriptor)
{
- DataLayout dataLayout = desc.m_DataLayout;
+ DataLayout dataLayout = descriptor.m_DataLayout;
const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_BlockSize);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout);
diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
index b782bbe24d..3674bda1b6 100644
--- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
+++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp
@@ -14,7 +14,7 @@ namespace armnn
{
arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const SpaceToDepthDescriptor& desc);
+ const SpaceToDepthDescriptor& descriptor);
class ClSpaceToDepthWorkload : public BaseWorkload<SpaceToDepthQueueDescriptor>
{
diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp
index 8eb58c967e..b1ab17d6d2 100644
--- a/src/backends/cl/workloads/ClSplitterWorkload.cpp
+++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp
@@ -57,6 +57,11 @@ ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor
const arm_compute::CLCompileContext&)
: BaseWorkload<SplitterQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSplitterWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
bool allOutputsAreSubtensors = true;
// Check that all outputs are sub-tensors
@@ -109,7 +114,7 @@ void ClSplitterWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSplitterWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp
index 38c76eb648..5070356dee 100644
--- a/src/backends/cl/workloads/ClStackWorkload.cpp
+++ b/src/backends/cl/workloads/ClStackWorkload.cpp
@@ -49,6 +49,12 @@ ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor,
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<StackQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClStackWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
std::vector<arm_compute::ICLTensor*> aclInputs;
for (auto input : m_Data.m_Inputs)
{
@@ -67,7 +73,7 @@ void ClStackWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClStackWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClStackWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
index adf32ce1fc..51a77c54ad 100644
--- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
+++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp
@@ -57,6 +57,12 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClStridedSliceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1);
arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -92,7 +98,7 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor
void ClStridedSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClStridedSliceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClStridedSliceWorkload_Execute", this->GetGuid());
RunClFunction(m_StridedSliceLayer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
index e320fec342..6465e3e050 100644
--- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp
+++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp
@@ -36,7 +36,7 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d
void ClSubtractionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSubtractionWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
index b40b4b10ca..0b5c7c628b 100644
--- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp
@@ -61,6 +61,23 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload(
BaseWorkload<TransposeConvolution2dQueueDescriptor>(descriptor, info),
m_Layer(memoryManager)
{
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClTransposeConvolutionWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo();
m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
@@ -98,7 +115,7 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload(
void ClTransposeConvolution2dWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClTransposeConvolution2dWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTransposeConvolution2dWorkload_Execute", this->GetGuid());
RunClFunction(m_Layer, CHECK_LOCATION());
}
diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp
index 7ef502eb8d..d80eae87ea 100644
--- a/src/backends/cl/workloads/ClTransposeWorkload.cpp
+++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp
@@ -31,6 +31,12 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip
const arm_compute::CLCompileContext& clCompileContext)
: BaseWorkload<TransposeQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClTransposeWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
const arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -45,7 +51,7 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip
void ClTransposeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL(GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(GetName() + "_Execute", this->GetGuid());
RunClFunction(m_PermuteFunction, CHECK_LOCATION());
}