From bcd860a30eba22bb2ba0943ad705734ce0ec5b23 Mon Sep 17 00:00:00 2001 From: Keith Davis Date: Thu, 5 Aug 2021 14:20:33 +0100 Subject: IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in CL Signed-off-by: Keith Davis Change-Id: I92dd410da7ad633a46d025fdc2b26093041c439b --- src/backends/cl/workloads/ClAbsWorkload.cpp | 2 +- src/backends/cl/workloads/ClActivationWorkload.cpp | 10 ++++++-- src/backends/cl/workloads/ClAdditionWorkload.cpp | 2 +- src/backends/cl/workloads/ClArgMinMaxWorkload.cpp | 8 +++++- .../ClBatchNormalizationFloatWorkload.cpp | 24 ++++++++++------- .../ClBatchNormalizationFloatWorkload.hpp | 2 +- .../cl/workloads/ClBatchToSpaceNdWorkload.cpp | 25 +++++++++++------- .../cl/workloads/ClBatchToSpaceNdWorkload.hpp | 2 +- src/backends/cl/workloads/ClCastWorkload.cpp | 2 +- src/backends/cl/workloads/ClComparisonWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClConcatWorkload.cpp | 12 ++++++--- src/backends/cl/workloads/ClConstantWorkload.cpp | 2 +- .../cl/workloads/ClConvertFp16ToFp32Workload.cpp | 2 +- .../cl/workloads/ClConvertFp32ToFp16Workload.cpp | 2 +- .../cl/workloads/ClConvolution2dWorkload.cpp | 2 +- .../cl/workloads/ClDepthToSpaceWorkload.cpp | 20 ++++++++++----- .../cl/workloads/ClDepthToSpaceWorkload.hpp | 2 +- .../workloads/ClDepthwiseConvolutionWorkload.cpp | 25 +++++++++++++++--- src/backends/cl/workloads/ClDequantizeWorkload.cpp | 2 +- src/backends/cl/workloads/ClDivisionWorkload.cpp | 2 +- src/backends/cl/workloads/ClExpWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClFillWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClFloorFloatWorkload.cpp | 2 +- .../cl/workloads/ClFullyConnectedWorkload.cpp | 30 +++++++++++++++++----- src/backends/cl/workloads/ClGatherWorkload.cpp | 8 +++++- .../workloads/ClInstanceNormalizationWorkload.cpp | 8 +++++- .../workloads/ClL2NormalizationFloatWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClLogWorkload.cpp | 2 +- src/backends/cl/workloads/ClLogicalAndWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClLogicalNotWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClLogicalOrWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClLstmFloatWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClMaximumWorkload.cpp | 2 +- src/backends/cl/workloads/ClMeanWorkload.cpp | 13 +++++++--- src/backends/cl/workloads/ClMeanWorkload.hpp | 2 +- src/backends/cl/workloads/ClMinimumWorkload.cpp | 2 +- .../cl/workloads/ClMultiplicationWorkload.cpp | 2 +- src/backends/cl/workloads/ClNegWorkload.cpp | 2 +- .../cl/workloads/ClNormalizationFloatWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClPadWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClPermuteWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClPooling2dWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClPreluWorkload.cpp | 2 +- src/backends/cl/workloads/ClQLstmWorkload.cpp | 7 +++++ src/backends/cl/workloads/ClQuantizeWorkload.cpp | 2 +- .../cl/workloads/ClQuantizedLstmWorkload.cpp | 2 +- src/backends/cl/workloads/ClReduceWorkload.cpp | 22 ++++++++++------ src/backends/cl/workloads/ClReduceWorkload.hpp | 2 +- src/backends/cl/workloads/ClReshapeWorkload.cpp | 2 +- src/backends/cl/workloads/ClResizeWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClRsqrtWorkload.cpp | 2 +- src/backends/cl/workloads/ClSinWorkload.cpp | 2 +- src/backends/cl/workloads/ClSliceWorkload.cpp | 8 +++++- src/backends/cl/workloads/ClSoftmaxWorkload.cpp | 8 +++++- .../cl/workloads/ClSpaceToBatchNdWorkload.cpp | 8 +++++- .../cl/workloads/ClSpaceToDepthWorkload.cpp | 19 +++++++++----- .../cl/workloads/ClSpaceToDepthWorkload.hpp | 2 +- src/backends/cl/workloads/ClSplitterWorkload.cpp | 7 ++++- src/backends/cl/workloads/ClStackWorkload.cpp | 8 +++++- .../cl/workloads/ClStridedSliceWorkload.cpp | 8 +++++- .../cl/workloads/ClSubtractionWorkload.cpp | 2 +- .../workloads/ClTransposeConvolution2dWorkload.cpp | 19 +++++++++++++- src/backends/cl/workloads/ClTransposeWorkload.cpp | 8 +++++- 64 files changed, 360 insertions(+), 113 deletions(-) diff --git a/src/backends/cl/workloads/ClAbsWorkload.cpp b/src/backends/cl/workloads/ClAbsWorkload.cpp index 4682c646d1..fa8e4f737f 100644 --- a/src/backends/cl/workloads/ClAbsWorkload.cpp +++ b/src/backends/cl/workloads/ClAbsWorkload.cpp @@ -39,7 +39,7 @@ ClAbsWorkload::ClAbsWorkload(const AbsQueueDescriptor& descriptor, void ClAbsWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClAbsWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClAbsWorkload_Execute", this->GetGuid()); RunClFunction(m_AbsLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClActivationWorkload.cpp b/src/backends/cl/workloads/ClActivationWorkload.cpp index e2f64a9d7e..20a65b680e 100644 --- a/src/backends/cl/workloads/ClActivationWorkload.cpp +++ b/src/backends/cl/workloads/ClActivationWorkload.cpp @@ -34,19 +34,25 @@ ClActivationWorkload::ClActivationWorkload(const ActivationQueueDescriptor& desc const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClActivationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClActivationWorkload", 1, 1); const arm_compute::ActivationLayerInfo activationLayerInfo = ConvertActivationDescriptorToAclActivationLayerInfo(m_Data.m_Parameters); - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); m_ActivationLayer.configure(clCompileContext, &input, &output, activationLayerInfo); } void ClActivationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClActivationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClActivationWorkload_Execute", this->GetGuid()); RunClFunction(m_ActivationLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClAdditionWorkload.cpp b/src/backends/cl/workloads/ClAdditionWorkload.cpp index 4793cc6f8f..9bef0603e1 100644 --- a/src/backends/cl/workloads/ClAdditionWorkload.cpp +++ b/src/backends/cl/workloads/ClAdditionWorkload.cpp @@ -36,7 +36,7 @@ ClAdditionWorkload::ClAdditionWorkload(const AdditionQueueDescriptor& descriptor void ClAdditionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClAdditionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClAdditionWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp index 7475cfa315..78646a7f86 100644 --- a/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp +++ b/src/backends/cl/workloads/ClArgMinMaxWorkload.cpp @@ -57,6 +57,12 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClArgMinMaxWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); @@ -80,7 +86,7 @@ ClArgMinMaxWorkload::ClArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descrip void ClArgMinMaxWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClArgMinMaxWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClArgMinMaxWorkload_Execute", this->GetGuid()); RunClFunction(m_ArgMinMaxLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp index 361d6f87a5..8367d7e266 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -22,21 +22,21 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, - const BatchNormalizationDescriptor& desc, + const BatchNormalizationDescriptor& descriptor, const ActivationDescriptor* activationDescriptor) { const arm_compute::TensorInfo aclInputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(input, desc.m_DataLayout); + armcomputetensorutils::BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(output, desc.m_DataLayout); + armcomputetensorutils::BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); const arm_compute::TensorInfo aclMeanInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(mean, desc.m_DataLayout); + armcomputetensorutils::BuildArmComputeTensorInfo(mean, descriptor.m_DataLayout); const arm_compute::TensorInfo aclVarInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(var, desc.m_DataLayout); + armcomputetensorutils::BuildArmComputeTensorInfo(var, descriptor.m_DataLayout); const arm_compute::TensorInfo aclBetaInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(beta, desc.m_DataLayout); + armcomputetensorutils::BuildArmComputeTensorInfo(beta, descriptor.m_DataLayout); const arm_compute::TensorInfo aclGammaInfo = - armcomputetensorutils::BuildArmComputeTensorInfo(gamma, desc.m_DataLayout); + armcomputetensorutils::BuildArmComputeTensorInfo(gamma, descriptor.m_DataLayout); const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( activationDescriptor); @@ -47,7 +47,7 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, &aclVarInfo, &aclBetaInfo, &aclGammaInfo, - desc.m_Eps, + descriptor.m_Eps, activationInfo); } @@ -57,6 +57,12 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchNormalizationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Mean = std::make_unique(); BuildArmComputeTensor(*m_Mean, m_Data.m_Mean->GetTensorInfo()); @@ -103,7 +109,7 @@ ClBatchNormalizationFloatWorkload::ClBatchNormalizationFloatWorkload( void ClBatchNormalizationFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchNormalizationFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchNormalizationFloatWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp index c9f1f7f295..0ba2d97e8f 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp @@ -19,7 +19,7 @@ arm_compute::Status ClBatchNormalizationValidate(const TensorInfo& input, const TensorInfo& var, const TensorInfo& beta, const TensorInfo& gamma, - const BatchNormalizationDescriptor& desc, + const BatchNormalizationDescriptor& descriptor, const ActivationDescriptor* activationDescriptor = nullptr); class ClBatchNormalizationFloatWorkload : public FloatWorkload diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp index b9736db642..8eef587644 100644 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.cpp @@ -17,11 +17,17 @@ namespace armnn { using namespace armcomputetensorutils; -ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc, +ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info, const arm_compute::CLCompileContext& clCompileContext) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchToSpaceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClBatchToSpaceNdWorkload", 1, 1); arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); @@ -30,8 +36,8 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc input.info()->set_data_layout(aclDataLayout); // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = armnn::numeric_cast(desc.m_Parameters.m_BlockShape[0]); - int32_t blockWidth = armnn::numeric_cast(desc.m_Parameters.m_BlockShape[1]); + int32_t blockHeight = armnn::numeric_cast(descriptor.m_Parameters.m_BlockShape[0]); + int32_t blockWidth = armnn::numeric_cast(descriptor.m_Parameters.m_BlockShape[1]); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); @@ -41,19 +47,20 @@ ClBatchToSpaceNdWorkload::ClBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDesc void ClBatchToSpaceNdWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClBatchToSpaceNdWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchToSpaceNdWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const BatchToSpaceNdDescriptor& desc) { - DataLayout dataLayout = desc.m_DataLayout; + const BatchToSpaceNdDescriptor& descriptor) +{ + DataLayout dataLayout = descriptor.m_DataLayout; const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout); // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = armnn::numeric_cast(desc.m_BlockShape[0]); - int32_t blockWidth = armnn::numeric_cast(desc.m_BlockShape[1]); + int32_t blockHeight = armnn::numeric_cast(descriptor.m_BlockShape[0]); + int32_t blockWidth = armnn::numeric_cast(descriptor.m_BlockShape[1]); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout); diff --git a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp index 2262f33c73..7ef8300f89 100644 --- a/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp +++ b/src/backends/cl/workloads/ClBatchToSpaceNdWorkload.hpp @@ -13,7 +13,7 @@ namespace armnn arm_compute::Status ClBatchToSpaceNdWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const BatchToSpaceNdDescriptor& desc); + const BatchToSpaceNdDescriptor& descriptor); class ClBatchToSpaceNdWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClCastWorkload.cpp b/src/backends/cl/workloads/ClCastWorkload.cpp index e995e42386..07b76dc064 100644 --- a/src/backends/cl/workloads/ClCastWorkload.cpp +++ b/src/backends/cl/workloads/ClCastWorkload.cpp @@ -40,7 +40,7 @@ ClCastWorkload::ClCastWorkload(const CastQueueDescriptor& descriptor, void ClCastWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClCastWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClCastWorkload_Execute", this->GetGuid()); RunClFunction(m_CastLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClComparisonWorkload.cpp b/src/backends/cl/workloads/ClComparisonWorkload.cpp index 35e6d68733..d83682d81b 100644 --- a/src/backends/cl/workloads/ClComparisonWorkload.cpp +++ b/src/backends/cl/workloads/ClComparisonWorkload.cpp @@ -44,6 +44,12 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonComparisonWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClComparisonWorkload", 2, 1); arm_compute::ICLTensor& input0 = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -57,7 +63,7 @@ ClComparisonWorkload::ClComparisonWorkload(const ComparisonQueueDescriptor& desc void ClComparisonWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClComparisonWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClComparisonWorkload_Execute", this->GetGuid()); RunClFunction(m_ComparisonLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClConcatWorkload.cpp b/src/backends/cl/workloads/ClConcatWorkload.cpp index 1c2d476e0c..233fd19542 100644 --- a/src/backends/cl/workloads/ClConcatWorkload.cpp +++ b/src/backends/cl/workloads/ClConcatWorkload.cpp @@ -18,9 +18,9 @@ using namespace armcomputetensorutils; namespace { -size_t CalcAxis(const OriginsDescriptor& desc) +size_t CalcAxis(const OriginsDescriptor& descriptor) { - return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1; + return (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1; } } //namespace @@ -50,6 +50,12 @@ ClConcatWorkload::ClConcatWorkload(const ConcatQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConcatWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + bool allInputsAreSubtensors = true; // Check that all inputs are sub-tensors @@ -95,7 +101,7 @@ void ClConcatWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConcatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConcatWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/cl/workloads/ClConstantWorkload.cpp b/src/backends/cl/workloads/ClConstantWorkload.cpp index 60dcd59268..1ff7504058 100644 --- a/src/backends/cl/workloads/ClConstantWorkload.cpp +++ b/src/backends/cl/workloads/ClConstantWorkload.cpp @@ -51,7 +51,7 @@ ClConstantWorkload::ClConstantWorkload(const ConstantQueueDescriptor& descriptor void ClConstantWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConstantWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConstantWorkload_Execute", this->GetGuid()); // The intermediate tensor held by the corresponding layer output handler can be initialised with the given data // on the first inference, then reused for subsequent inferences. diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp index aaffd83741..455ec1af13 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -30,7 +30,7 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( void ClConvertFp16ToFp32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp16ToFp32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvertFp16ToFp32Workload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp index a9f1d91bcf..8e6b0cea67 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -30,7 +30,7 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( void ClConvertFp32ToFp16Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClConvertFp32ToFp16Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClConvertFp32ToFp16Workload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index ab9d5bcbd2..12a47dcd94 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -132,7 +132,7 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip } // Report Profiling Details - ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Execute_Guid", + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClConvolution2dWorkload_Construct", descriptor.m_Parameters, detailsInfo, this->GetGuid()); diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp index d42b261a10..aeab0293c1 100644 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.cpp @@ -21,12 +21,12 @@ using namespace armcomputetensorutils; arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const DepthToSpaceDescriptor& desc) + const DepthToSpaceDescriptor& descriptor) { - DataLayout dataLayout = desc.m_DataLayout; + DataLayout dataLayout = descriptor.m_DataLayout; const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout); - int32_t blockSize = armnn::numeric_cast(desc.m_BlockSize); + int32_t blockSize = armnn::numeric_cast(descriptor.m_BlockSize); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout); @@ -36,11 +36,17 @@ arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, return aclStatus; } -ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc, +ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info, const arm_compute::CLCompileContext& clCompileContext) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthToSpaceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClDepthToSpaceWorkload", 1, 1); arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); @@ -49,7 +55,7 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor PolymorphicPointerDowncast(m_Data.m_Inputs[0])->GetTensor(); input.info()->set_data_layout(aclDataLayout); - int32_t blockSize = armnn::numeric_cast(desc.m_Parameters.m_BlockSize); + int32_t blockSize = armnn::numeric_cast(descriptor.m_Parameters.m_BlockSize); arm_compute::ICLTensor& output = PolymorphicPointerDowncast(m_Data.m_Outputs[0])->GetTensor(); @@ -60,7 +66,7 @@ ClDepthToSpaceWorkload::ClDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor void ClDepthToSpaceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthToSpaceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthToSpaceWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp index 6cb8bb5e9e..01f83331c5 100644 --- a/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp +++ b/src/backends/cl/workloads/ClDepthToSpaceWorkload.hpp @@ -16,7 +16,7 @@ namespace armnn arm_compute::Status ClDepthToSpaceWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const DepthToSpaceDescriptor& desc); + const DepthToSpaceDescriptor& descriptor); class ClDepthToSpaceWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp index 9a9977bd54..9592b37f9d 100644 --- a/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp +++ b/src/backends/cl/workloads/ClDepthwiseConvolutionWorkload.cpp @@ -78,15 +78,32 @@ ClDepthwiseConvolutionWorkload::ClDepthwiseConvolutionWorkload( const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Add details for profiling output + WorkloadInfo detailsInfo; + + detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; + detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; + detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); + if (descriptor.m_Parameters.m_BiasEnabled) + { + detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); + } + + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClDepthwiseConvolutionWorkload_Construct", + descriptor.m_Parameters, + detailsInfo, + this->GetGuid()); + // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M] // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library ConstTensor weightPermuted; unsigned int depthMultiplier; std::unique_ptr permuteBuffer(new unsigned char[m_Data.m_Weight->GetTensorInfo().GetNumBytes()]); std::tie(weightPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight, - info.m_InputTensorInfos[0], - m_Data.m_Parameters.m_DataLayout, - permuteBuffer.get()); + info.m_InputTensorInfos[0], + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); // Convert the weights into the compute library format m_KernelTensor = std::make_unique(); @@ -151,7 +168,7 @@ void ClDepthwiseConvolutionWorkload::FreeUnusedTensors() void ClDepthwiseConvolutionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDepthwiseConvolutionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDepthwiseConvolutionWorkload_Execute", this->GetGuid()); ARMNN_ASSERT(m_DepthwiseConvolutionLayer); RunClFunction(*m_DepthwiseConvolutionLayer, CHECK_LOCATION()); diff --git a/src/backends/cl/workloads/ClDequantizeWorkload.cpp b/src/backends/cl/workloads/ClDequantizeWorkload.cpp index 42cc579a8c..6bdeaa8fec 100644 --- a/src/backends/cl/workloads/ClDequantizeWorkload.cpp +++ b/src/backends/cl/workloads/ClDequantizeWorkload.cpp @@ -49,7 +49,7 @@ void ClDequantizeWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDequantizeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDequantizeWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/cl/workloads/ClDivisionWorkload.cpp b/src/backends/cl/workloads/ClDivisionWorkload.cpp index 76220a1b64..d444a192cb 100644 --- a/src/backends/cl/workloads/ClDivisionWorkload.cpp +++ b/src/backends/cl/workloads/ClDivisionWorkload.cpp @@ -49,7 +49,7 @@ ClDivisionWorkload::ClDivisionWorkload(const DivisionQueueDescriptor& descriptor void ClDivisionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClDivisionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClDivisionWorkload_Execute", this->GetGuid()); RunClFunction(m_ArithmeticDivision, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClExpWorkload.cpp b/src/backends/cl/workloads/ClExpWorkload.cpp index 60c383f8bf..9c1f0368a3 100644 --- a/src/backends/cl/workloads/ClExpWorkload.cpp +++ b/src/backends/cl/workloads/ClExpWorkload.cpp @@ -28,6 +28,12 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClExpWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClExpWorkload", 1, 1); arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -38,7 +44,7 @@ ClExpWorkload::ClExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, void ClExpWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClExpWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClExpWorkload_Execute", this->GetGuid()); RunClFunction(m_ExpLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClFillWorkload.cpp b/src/backends/cl/workloads/ClFillWorkload.cpp index a2204fa42d..8cb2db4b25 100644 --- a/src/backends/cl/workloads/ClFillWorkload.cpp +++ b/src/backends/cl/workloads/ClFillWorkload.cpp @@ -20,6 +20,12 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFillWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClFillWorkload", 1, 1); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); @@ -30,7 +36,7 @@ ClFillWorkload::ClFillWorkload(const FillQueueDescriptor& descriptor, void ClFillWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFillWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFillWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp index 3915270c24..d2b487169e 100644 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp @@ -35,7 +35,7 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto void ClFloorFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFloorFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFloorFloatWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp index d1d911ac13..a0889e1b60 100644 --- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp +++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp @@ -28,10 +28,10 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input, const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; + arm_compute::TensorInfo* optionalAclBiases = nullptr; if (descriptor.m_BiasEnabled) { - aclBiases = BuildArmComputeTensorInfo(biases); + aclBiases = BuildArmComputeTensorInfo(biases); optionalAclBiases = &aclBiases; } @@ -50,9 +50,25 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload( const WorkloadInfo& info, std::shared_ptr& memoryManager, const arm_compute::CLCompileContext& clCompileContext) - : BaseWorkload(descriptor, info) - , m_FullyConnectedLayer(memoryManager) + : BaseWorkload(descriptor, info), m_FullyConnectedLayer(memoryManager) { + // Add details for profiling output + WorkloadInfo detailsInfo; + + detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; + detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; + detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); + if (descriptor.m_Parameters.m_BiasEnabled) + { + detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); + } + + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFullyConnectedWorkload_Construct", + descriptor.m_Parameters, + detailsInfo, + this->GetGuid()); + m_WeightsTensor = std::make_unique(); BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo()); @@ -64,13 +80,13 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload( m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1); - arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); arm_compute::FullyConnectedLayerInfo fc_info = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); m_FullyConnectedLayer.configure(clCompileContext, &input, @@ -94,7 +110,7 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload( void ClFullyConnectedWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFullyConnectedWorkload_Execute", this->GetGuid()); RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClGatherWorkload.cpp b/src/backends/cl/workloads/ClGatherWorkload.cpp index 98dfe7bc81..7c8d1ab787 100644 --- a/src/backends/cl/workloads/ClGatherWorkload.cpp +++ b/src/backends/cl/workloads/ClGatherWorkload.cpp @@ -31,6 +31,12 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClGatherWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClGatherWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -44,7 +50,7 @@ ClGatherWorkload::ClGatherWorkload(const GatherQueueDescriptor& descriptor, void ClGatherWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClGatherWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClGatherWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } } // namespace armnn diff --git a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp index a0e8e7b87d..a4f20c5b6c 100644 --- a/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp +++ b/src/backends/cl/workloads/ClInstanceNormalizationWorkload.cpp @@ -35,6 +35,12 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload( const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClInstanceNormalizationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClInstanceNormalizationWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -54,7 +60,7 @@ ClInstanceNormalizationWorkload::ClInstanceNormalizationWorkload( void ClInstanceNormalizationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClInstanceNormalizationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClInstanceNormalizationWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp index 984f21a4db..953ff4aa9f 100644 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp @@ -31,6 +31,12 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClL2NormalizationFloatWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClL2NormalizationFloatWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -47,7 +53,7 @@ ClL2NormalizationFloatWorkload::ClL2NormalizationFloatWorkload(const L2Normaliza void ClL2NormalizationFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClL2NormalizationFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClL2NormalizationFloatWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp index 1a255f13f6..6c032111db 100644 --- a/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp +++ b/src/backends/cl/workloads/ClLogSoftmaxWorkload.cpp @@ -32,6 +32,12 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc : BaseWorkload(descriptor, info) , m_LogSoftmaxLayer(memoryManager) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogSoftmaxWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClLogSoftmaxWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -43,7 +49,7 @@ ClLogSoftmaxWorkload::ClLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& desc void ClLogSoftmaxWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogSoftmaxWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogSoftmaxWorkload_Execute", this->GetGuid()); RunClFunction(m_LogSoftmaxLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClLogWorkload.cpp b/src/backends/cl/workloads/ClLogWorkload.cpp index b35345f1ce..180c0afd00 100644 --- a/src/backends/cl/workloads/ClLogWorkload.cpp +++ b/src/backends/cl/workloads/ClLogWorkload.cpp @@ -38,7 +38,7 @@ ClLogWorkload::ClLogWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, void ClLogWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogWorkload_Execute", this->GetGuid()); RunClFunction(m_LogLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp index f04cede2f8..30a187be8a 100644 --- a/src/backends/cl/workloads/ClLogicalAndWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalAndWorkload.cpp @@ -36,6 +36,12 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalAndWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClLogicalAndWorkload", 2, 1); arm_compute::ICLTensor& input0 = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -47,7 +53,7 @@ ClLogicalAndWorkload::ClLogicalAndWorkload(const LogicalBinaryQueueDescriptor& d void ClLogicalAndWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalAndWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalAndWorkload_Execute", this->GetGuid()); m_LogicalAndLayer.run(); } diff --git a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp index 475e57f8dc..4e95fcd266 100644 --- a/src/backends/cl/workloads/ClLogicalNotWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalNotWorkload.cpp @@ -33,6 +33,12 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalNotWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClLogicalNotWorkload", 1, 1); arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -43,7 +49,7 @@ ClLogicalNotWorkload::ClLogicalNotWorkload(const ElementwiseUnaryQueueDescriptor void ClLogicalNotWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalNotWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalNotWorkload_Execute", this->GetGuid()); m_LogicalNotLayer.run(); } diff --git a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp index 355310ef5a..b4eb11cb4d 100644 --- a/src/backends/cl/workloads/ClLogicalOrWorkload.cpp +++ b/src/backends/cl/workloads/ClLogicalOrWorkload.cpp @@ -36,6 +36,12 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLogicalOrWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClLogicalOrWorkload", 2, 1); arm_compute::ICLTensor& input0 = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -47,7 +53,7 @@ ClLogicalOrWorkload::ClLogicalOrWorkload(const LogicalBinaryQueueDescriptor& des void ClLogicalOrWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLogicalOrWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLogicalOrWorkload_Execute", this->GetGuid()); m_LogicalOrLayer.run(); } diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp index 908f20bfe5..709b14528e 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -24,6 +24,12 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClLstmFloatWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + arm_compute::LSTMParams lstm_param; // Basic parameters @@ -254,7 +260,7 @@ ClLstmFloatWorkload::ClLstmFloatWorkload(const LstmQueueDescriptor &descriptor, void ClLstmFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClLstmFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClLstmFloatWorkload_Execute", this->GetGuid()); RunClFunction(m_LstmLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClMaximumWorkload.cpp b/src/backends/cl/workloads/ClMaximumWorkload.cpp index 0aa15e5dd3..5a19c6949c 100644 --- a/src/backends/cl/workloads/ClMaximumWorkload.cpp +++ b/src/backends/cl/workloads/ClMaximumWorkload.cpp @@ -52,7 +52,7 @@ ClMaximumWorkload::ClMaximumWorkload(const MaximumQueueDescriptor& descriptor, void ClMaximumWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMaximumWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMaximumWorkload_Execute", this->GetGuid()); RunClFunction(m_MaximumLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClMeanWorkload.cpp b/src/backends/cl/workloads/ClMeanWorkload.cpp index 4cc0f7c1c2..cd79d04612 100644 --- a/src/backends/cl/workloads/ClMeanWorkload.cpp +++ b/src/backends/cl/workloads/ClMeanWorkload.cpp @@ -16,16 +16,16 @@ using namespace armcomputetensorutils; arm_compute::Status ClMeanValidate(const TensorInfo& input, const TensorInfo& output, - const MeanDescriptor& desc) + const MeanDescriptor& descriptor) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), - desc.m_Axis); + descriptor.m_Axis); - return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo); + return arm_compute::CLReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo); } ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, @@ -33,6 +33,11 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClMeanWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); m_Data.ValidateInputsOutputs("ClMeanWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -47,7 +52,7 @@ ClMeanWorkload::ClMeanWorkload(const MeanQueueDescriptor& descriptor, void ClMeanWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMeanWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMeanWorkload_Execute", this->GetGuid()); m_Layer.run(); } diff --git a/src/backends/cl/workloads/ClMeanWorkload.hpp b/src/backends/cl/workloads/ClMeanWorkload.hpp index 04e9fe23f2..c9229acf17 100644 --- a/src/backends/cl/workloads/ClMeanWorkload.hpp +++ b/src/backends/cl/workloads/ClMeanWorkload.hpp @@ -14,7 +14,7 @@ namespace armnn arm_compute::Status ClMeanValidate(const TensorInfo& input, const TensorInfo& output, - const MeanDescriptor& desc); + const MeanDescriptor& descriptor); class ClMeanWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClMinimumWorkload.cpp b/src/backends/cl/workloads/ClMinimumWorkload.cpp index 4924002432..22e928763d 100644 --- a/src/backends/cl/workloads/ClMinimumWorkload.cpp +++ b/src/backends/cl/workloads/ClMinimumWorkload.cpp @@ -52,7 +52,7 @@ ClMinimumWorkload::ClMinimumWorkload(const MinimumQueueDescriptor& descriptor, void ClMinimumWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMinimumWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMinimumWorkload_Execute", this->GetGuid()); RunClFunction(m_MinimumLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp index 2bd1e1615a..b0b71ce3f5 100644 --- a/src/backends/cl/workloads/ClMultiplicationWorkload.cpp +++ b/src/backends/cl/workloads/ClMultiplicationWorkload.cpp @@ -75,7 +75,7 @@ ClMultiplicationWorkload::ClMultiplicationWorkload(const MultiplicationQueueDesc void ClMultiplicationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClMultiplicationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClMultiplicationWorkload_Execute", this->GetGuid()); RunClFunction(m_PixelWiseMultiplication, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClNegWorkload.cpp b/src/backends/cl/workloads/ClNegWorkload.cpp index 7505ab608a..fb5b040dec 100644 --- a/src/backends/cl/workloads/ClNegWorkload.cpp +++ b/src/backends/cl/workloads/ClNegWorkload.cpp @@ -38,7 +38,7 @@ ClNegWorkload::ClNegWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, void ClNegWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClNegWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClNegWorkload_Execute", this->GetGuid()); RunClFunction(m_NegLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp index e9b2caf6ee..9c6e0a1e97 100644 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp @@ -33,6 +33,12 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu const arm_compute::CLCompileContext& clCompileContext) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClNormalizationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClNormalizationFloatWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -49,7 +55,7 @@ ClNormalizationFloatWorkload::ClNormalizationFloatWorkload(const NormalizationQu void ClNormalizationFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClNormalizationFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClNormalizationFloatWorkload_Execute", this->GetGuid()); RunClFunction(m_NormalizationLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClPadWorkload.cpp b/src/backends/cl/workloads/ClPadWorkload.cpp index 533855c295..10c8907d43 100644 --- a/src/backends/cl/workloads/ClPadWorkload.cpp +++ b/src/backends/cl/workloads/ClPadWorkload.cpp @@ -20,6 +20,12 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPadWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + this->m_Data.ValidateInputsOutputs("ClPadWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); @@ -40,7 +46,7 @@ ClPadWorkload::ClPadWorkload(const PadQueueDescriptor& descriptor, void ClPadWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPadWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPadWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClPermuteWorkload.cpp b/src/backends/cl/workloads/ClPermuteWorkload.cpp index 5aadc7629e..c7efe7a7ed 100644 --- a/src/backends/cl/workloads/ClPermuteWorkload.cpp +++ b/src/backends/cl/workloads/ClPermuteWorkload.cpp @@ -31,6 +31,12 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPermuteWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + using armcomputetensorutils::BuildArmComputePermutationVector; m_Data.ValidateInputsOutputs(GetName(), 1, 1); @@ -45,7 +51,7 @@ ClPermuteWorkload::ClPermuteWorkload(const PermuteQueueDescriptor& descriptor, void ClPermuteWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL( GetName() + "_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(GetName() + "_Execute", this->GetGuid()); RunClFunction(m_PermuteFunction, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClPooling2dWorkload.cpp b/src/backends/cl/workloads/ClPooling2dWorkload.cpp index c7cc10218a..ff441ef915 100644 --- a/src/backends/cl/workloads/ClPooling2dWorkload.cpp +++ b/src/backends/cl/workloads/ClPooling2dWorkload.cpp @@ -33,6 +33,12 @@ ClPooling2dWorkload::ClPooling2dWorkload( const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClPooling2dWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClPooling2dWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -55,7 +61,7 @@ ClPooling2dWorkload::ClPooling2dWorkload( void ClPooling2dWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPooling2dWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPooling2dWorkload_Execute", this->GetGuid()); RunClFunction(m_PoolingLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClPreluWorkload.cpp b/src/backends/cl/workloads/ClPreluWorkload.cpp index 9b45441b02..beb9e43573 100644 --- a/src/backends/cl/workloads/ClPreluWorkload.cpp +++ b/src/backends/cl/workloads/ClPreluWorkload.cpp @@ -42,7 +42,7 @@ ClPreluWorkload::ClPreluWorkload(const PreluQueueDescriptor& descriptor, void ClPreluWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClPreluWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClPreluWorkload_Execute", this->GetGuid()); RunClFunction(m_PreluLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClQLstmWorkload.cpp b/src/backends/cl/workloads/ClQLstmWorkload.cpp index 0ae371575b..d7c7af7e10 100644 --- a/src/backends/cl/workloads/ClQLstmWorkload.cpp +++ b/src/backends/cl/workloads/ClQLstmWorkload.cpp @@ -19,6 +19,12 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClQLstmWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + arm_compute::LSTMParams qLstmParams; // Mandatory params @@ -231,6 +237,7 @@ ClQLstmWorkload::ClQLstmWorkload(const QLstmQueueDescriptor& descriptor, void ClQLstmWorkload::Execute() const { + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizedLstmWorkload_Execute", this->GetGuid()); m_QLstmLayer.run(); } diff --git a/src/backends/cl/workloads/ClQuantizeWorkload.cpp b/src/backends/cl/workloads/ClQuantizeWorkload.cpp index 527c64013b..dc668fd6b4 100644 --- a/src/backends/cl/workloads/ClQuantizeWorkload.cpp +++ b/src/backends/cl/workloads/ClQuantizeWorkload.cpp @@ -44,7 +44,7 @@ ClQuantizeWorkload::ClQuantizeWorkload(const QuantizeQueueDescriptor& descriptor void ClQuantizeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizeWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp index d50414b1cf..7bacf70a6a 100644 --- a/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp +++ b/src/backends/cl/workloads/ClQuantizedLstmWorkload.cpp @@ -137,7 +137,7 @@ ClQuantizedLstmWorkload::ClQuantizedLstmWorkload(const QuantizedLstmQueueDescrip void ClQuantizedLstmWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClQuantizedLstmWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClQuantizedLstmWorkload_Execute", this->GetGuid()); RunClFunction(m_QuantizedLstmLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp index c9c2e0a400..1a7bc64420 100644 --- a/src/backends/cl/workloads/ClReduceWorkload.cpp +++ b/src/backends/cl/workloads/ClReduceWorkload.cpp @@ -17,28 +17,28 @@ using namespace armcomputetensorutils; arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const ReduceDescriptor& desc) + const ReduceDescriptor& descriptor) { - if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty()) + if (descriptor.m_vAxis.size() == 1 || descriptor.m_vAxis.empty()) { - const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); + const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), - desc.m_vAxis); + descriptor.m_vAxis); return arm_compute::CLReductionOperation::validate(&aclInputInfo, &aclOutputInfo, static_cast(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); + ConvertReductionOperationToAcl(descriptor), + descriptor.m_KeepDims); } else { // Validate layer if there are multiple axes. arm_compute::Status status; - IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, desc, status); + IS_MULTI_AXES_REDUCE_SUPPORTED(ClReduceWorkloadValidate, input, descriptor, status); return status; } } @@ -46,6 +46,12 @@ arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClReduceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClReduceWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -63,7 +69,7 @@ ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, cons void ClReduceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReduceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClReduceWorkload_Execute", this->GetGuid()); m_Layer.run(); } diff --git a/src/backends/cl/workloads/ClReduceWorkload.hpp b/src/backends/cl/workloads/ClReduceWorkload.hpp index 8481eeea5a..8b0aadb1ae 100644 --- a/src/backends/cl/workloads/ClReduceWorkload.hpp +++ b/src/backends/cl/workloads/ClReduceWorkload.hpp @@ -14,7 +14,7 @@ namespace armnn arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const ReduceDescriptor& desc); + const ReduceDescriptor& descriptor); class ClReduceWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClReshapeWorkload.cpp b/src/backends/cl/workloads/ClReshapeWorkload.cpp index 1f82cfbee2..b9b92a8910 100644 --- a/src/backends/cl/workloads/ClReshapeWorkload.cpp +++ b/src/backends/cl/workloads/ClReshapeWorkload.cpp @@ -36,7 +36,7 @@ ClReshapeWorkload::ClReshapeWorkload(const ReshapeQueueDescriptor& descriptor, void ClReshapeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClReshapeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClReshapeWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClResizeWorkload.cpp b/src/backends/cl/workloads/ClResizeWorkload.cpp index 3406011d04..0c2b930039 100644 --- a/src/backends/cl/workloads/ClResizeWorkload.cpp +++ b/src/backends/cl/workloads/ClResizeWorkload.cpp @@ -51,6 +51,12 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClResizeWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClResizeWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -81,7 +87,7 @@ ClResizeWorkload::ClResizeWorkload(const ResizeQueueDescriptor& descriptor, void ClResizeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClResizeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClResizeWorkload_Execute", this->GetGuid()); RunClFunction(m_ResizeLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClRsqrtWorkload.cpp b/src/backends/cl/workloads/ClRsqrtWorkload.cpp index a3a04c11eb..8d48bfad33 100644 --- a/src/backends/cl/workloads/ClRsqrtWorkload.cpp +++ b/src/backends/cl/workloads/ClRsqrtWorkload.cpp @@ -38,7 +38,7 @@ ClRsqrtWorkload::ClRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, void ClRsqrtWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClRsqrtWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClRsqrtWorkload_Execute", this->GetGuid()); RunClFunction(m_RsqrtLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClSinWorkload.cpp b/src/backends/cl/workloads/ClSinWorkload.cpp index 17572c657b..dcde349d8d 100644 --- a/src/backends/cl/workloads/ClSinWorkload.cpp +++ b/src/backends/cl/workloads/ClSinWorkload.cpp @@ -38,7 +38,7 @@ ClSinWorkload::ClSinWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, void ClSinWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSinWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSinWorkload_Execute", this->GetGuid()); RunClFunction(m_SinLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClSliceWorkload.cpp b/src/backends/cl/workloads/ClSliceWorkload.cpp index 16271961f9..6f3c1a9402 100644 --- a/src/backends/cl/workloads/ClSliceWorkload.cpp +++ b/src/backends/cl/workloads/ClSliceWorkload.cpp @@ -35,6 +35,12 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSliceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClSliceWorkload", 1, 1); arm_compute::ICLTensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -50,7 +56,7 @@ ClSliceWorkload::ClSliceWorkload(const SliceQueueDescriptor& descriptor, void ClSliceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSliceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSliceWorkload_Execute", this->GetGuid()); RunClFunction(m_SliceFunction, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp index 4547c682c9..0b7b10d7b0 100644 --- a/src/backends/cl/workloads/ClSoftmaxWorkload.cpp +++ b/src/backends/cl/workloads/ClSoftmaxWorkload.cpp @@ -32,6 +32,12 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, : BaseWorkload(descriptor, info) , m_SoftmaxLayer(memoryManager) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSoftmaxWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClSoftmaxWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -43,7 +49,7 @@ ClSoftmaxWorkload::ClSoftmaxWorkload(const SoftmaxQueueDescriptor& descriptor, void ClSoftmaxWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSoftmaxWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSoftmaxWorkload_Execute", this->GetGuid()); RunClFunction(m_SoftmaxLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp index 3aa8ebd2a8..70166192e5 100644 --- a/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp +++ b/src/backends/cl/workloads/ClSpaceToBatchNdWorkload.cpp @@ -50,6 +50,12 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload( const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSpaceToBatchNdWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClSpaceToBatchNdWorkload", 1, 1); arm_compute::ICLTensor& input = @@ -81,7 +87,7 @@ ClSpaceToBatchNdWorkload::ClSpaceToBatchNdWorkload( void ClSpaceToBatchNdWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToBatchNdWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSpaceToBatchNdWorkload_Execute", this->GetGuid()); RunClFunction(m_SpaceToBatchLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp index 67487c4bf1..119605a02b 100644 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp +++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.cpp @@ -17,11 +17,16 @@ namespace armnn { using namespace armcomputetensorutils; -ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc, +ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info, const arm_compute::CLCompileContext& clCompileContext) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSpaceToDepthWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); m_Data.ValidateInputsOutputs("ClSpaceToDepthWorkload", 1, 1); arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); @@ -29,7 +34,7 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); input.info()->set_data_layout(aclDataLayout); - int32_t blockSize = armnn::numeric_cast(desc.m_Parameters.m_BlockSize); + int32_t blockSize = armnn::numeric_cast(descriptor.m_Parameters.m_BlockSize); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); @@ -39,18 +44,18 @@ ClSpaceToDepthWorkload::ClSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor void ClSpaceToDepthWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSpaceToDepthWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSpaceToDepthWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const SpaceToDepthDescriptor& desc) + const SpaceToDepthDescriptor& descriptor) { - DataLayout dataLayout = desc.m_DataLayout; + DataLayout dataLayout = descriptor.m_DataLayout; const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, dataLayout); - int32_t blockSize = armnn::numeric_cast(desc.m_BlockSize); + int32_t blockSize = armnn::numeric_cast(descriptor.m_BlockSize); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, dataLayout); diff --git a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp index b782bbe24d..3674bda1b6 100644 --- a/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp +++ b/src/backends/cl/workloads/ClSpaceToDepthWorkload.hpp @@ -14,7 +14,7 @@ namespace armnn { arm_compute::Status ClSpaceToDepthWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const SpaceToDepthDescriptor& desc); + const SpaceToDepthDescriptor& descriptor); class ClSpaceToDepthWorkload : public BaseWorkload { diff --git a/src/backends/cl/workloads/ClSplitterWorkload.cpp b/src/backends/cl/workloads/ClSplitterWorkload.cpp index 8eb58c967e..b1ab17d6d2 100644 --- a/src/backends/cl/workloads/ClSplitterWorkload.cpp +++ b/src/backends/cl/workloads/ClSplitterWorkload.cpp @@ -57,6 +57,11 @@ ClSplitterWorkload::ClSplitterWorkload(const SplitterQueueDescriptor& descriptor const arm_compute::CLCompileContext&) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClSplitterWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); bool allOutputsAreSubtensors = true; // Check that all outputs are sub-tensors @@ -109,7 +114,7 @@ void ClSplitterWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSplitterWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSplitterWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/cl/workloads/ClStackWorkload.cpp b/src/backends/cl/workloads/ClStackWorkload.cpp index 38c76eb648..5070356dee 100644 --- a/src/backends/cl/workloads/ClStackWorkload.cpp +++ b/src/backends/cl/workloads/ClStackWorkload.cpp @@ -49,6 +49,12 @@ ClStackWorkload::ClStackWorkload(const StackQueueDescriptor& descriptor, const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClStackWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + std::vector aclInputs; for (auto input : m_Data.m_Inputs) { @@ -67,7 +73,7 @@ void ClStackWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClStackWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClStackWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp index adf32ce1fc..51a77c54ad 100644 --- a/src/backends/cl/workloads/ClStridedSliceWorkload.cpp +++ b/src/backends/cl/workloads/ClStridedSliceWorkload.cpp @@ -57,6 +57,12 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClStridedSliceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("ClStridedSliceWorkload", 1, 1); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -92,7 +98,7 @@ ClStridedSliceWorkload::ClStridedSliceWorkload(const StridedSliceQueueDescriptor void ClStridedSliceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClStridedSliceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClStridedSliceWorkload_Execute", this->GetGuid()); RunClFunction(m_StridedSliceLayer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClSubtractionWorkload.cpp b/src/backends/cl/workloads/ClSubtractionWorkload.cpp index e320fec342..6465e3e050 100644 --- a/src/backends/cl/workloads/ClSubtractionWorkload.cpp +++ b/src/backends/cl/workloads/ClSubtractionWorkload.cpp @@ -36,7 +36,7 @@ ClSubtractionWorkload::ClSubtractionWorkload(const SubtractionQueueDescriptor& d void ClSubtractionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClSubtractionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClSubtractionWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp index b40b4b10ca..0b5c7c628b 100644 --- a/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClTransposeConvolution2dWorkload.cpp @@ -61,6 +61,23 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload( BaseWorkload(descriptor, info), m_Layer(memoryManager) { + // Add details for profiling output + WorkloadInfo detailsInfo; + + detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; + detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; + detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); + if (descriptor.m_Parameters.m_BiasEnabled) + { + detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); + } + + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClTransposeConvolutionWorkload_Construct", + descriptor.m_Parameters, + detailsInfo, + this->GetGuid()); + const TensorInfo& weightInfo = m_Data.m_Weight->GetTensorInfo(); m_WeightsTensor = std::make_unique(); @@ -98,7 +115,7 @@ ClTransposeConvolution2dWorkload::ClTransposeConvolution2dWorkload( void ClTransposeConvolution2dWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL("ClTransposeConvolution2dWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClTransposeConvolution2dWorkload_Execute", this->GetGuid()); RunClFunction(m_Layer, CHECK_LOCATION()); } diff --git a/src/backends/cl/workloads/ClTransposeWorkload.cpp b/src/backends/cl/workloads/ClTransposeWorkload.cpp index 7ef502eb8d..d80eae87ea 100644 --- a/src/backends/cl/workloads/ClTransposeWorkload.cpp +++ b/src/backends/cl/workloads/ClTransposeWorkload.cpp @@ -31,6 +31,12 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip const arm_compute::CLCompileContext& clCompileContext) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClTransposeWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs(GetName(), 1, 1); const arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -45,7 +51,7 @@ ClTransposeWorkload::ClTransposeWorkload(const TransposeQueueDescriptor& descrip void ClTransposeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_CL(GetName() + "_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID(GetName() + "_Execute", this->GetGuid()); RunClFunction(m_PermuteFunction, CHECK_LOCATION()); } -- cgit v1.2.1