From 2d0679f33f75c43e7169fe0f0ee2d15d0620e091 Mon Sep 17 00:00:00 2001 From: Keith Davis Date: Thu, 5 Aug 2021 11:35:00 +0100 Subject: IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in Neon Signed-off-by: Keith Davis Change-Id: I7be77712a9f790928219ce91222d46cc766ab9dd --- src/backends/neon/workloads/NeonAbsWorkload.cpp | 2 +- .../neon/workloads/NeonActivationWorkload.cpp | 8 ++++- .../neon/workloads/NeonAdditionWorkload.cpp | 2 +- .../neon/workloads/NeonArgMinMaxWorkload.cpp | 8 ++++- .../workloads/NeonBatchNormalizationWorkload.cpp | 8 ++++- .../neon/workloads/NeonBatchToSpaceNdWorkload.cpp | 26 ++++++++------ src/backends/neon/workloads/NeonCastWorkload.cpp | 2 +- .../neon/workloads/NeonComparisonWorkload.cpp | 8 ++++- src/backends/neon/workloads/NeonConcatWorkload.cpp | 12 +++++-- .../neon/workloads/NeonConstantWorkload.cpp | 2 +- .../workloads/NeonConvertBf16ToFp32Workload.cpp | 2 +- .../workloads/NeonConvertFp16ToFp32Workload.cpp | 2 +- .../workloads/NeonConvertFp32ToBf16Workload.cpp | 2 +- .../workloads/NeonConvertFp32ToFp16Workload.cpp | 2 +- .../neon/workloads/NeonConvolution2dWorkload.cpp | 2 +- .../neon/workloads/NeonDepthToSpaceWorkload.cpp | 14 +++++--- .../workloads/NeonDepthwiseConvolutionWorkload.cpp | 41 +++++++++++++++------- .../neon/workloads/NeonDequantizeWorkload.cpp | 2 +- .../workloads/NeonDetectionPostProcessWorkload.cpp | 36 +++++++++++-------- .../workloads/NeonDetectionPostProcessWorkload.hpp | 2 +- .../neon/workloads/NeonDivisionWorkload.cpp | 2 +- src/backends/neon/workloads/NeonExpWorkload.cpp | 8 ++++- src/backends/neon/workloads/NeonFillWorkload.cpp | 8 ++++- .../neon/workloads/NeonFloorFloatWorkload.cpp | 2 +- .../neon/workloads/NeonFullyConnectedWorkload.cpp | 31 ++++++++++++---- src/backends/neon/workloads/NeonGatherWorkload.cpp | 8 ++++- .../NeonInstanceNormalizationWorkload.cpp | 8 ++++- .../workloads/NeonL2NormalizationFloatWorkload.cpp | 8 ++++- .../neon/workloads/NeonLogSoftmaxWorkload.cpp | 8 ++++- src/backends/neon/workloads/NeonLogWorkload.cpp | 2 +- .../neon/workloads/NeonLogicalAndWorkload.cpp | 8 ++++- .../neon/workloads/NeonLogicalNotWorkload.cpp | 8 ++++- .../neon/workloads/NeonLogicalOrWorkload.cpp | 10 ++++-- .../neon/workloads/NeonLstmFloatWorkload.cpp | 7 ++++ .../neon/workloads/NeonMaximumWorkload.cpp | 2 +- src/backends/neon/workloads/NeonMeanWorkload.cpp | 14 +++++--- src/backends/neon/workloads/NeonMeanWorkload.hpp | 2 +- .../neon/workloads/NeonMinimumWorkload.cpp | 2 +- .../neon/workloads/NeonMultiplicationWorkload.cpp | 2 +- src/backends/neon/workloads/NeonNegWorkload.cpp | 2 +- .../workloads/NeonNormalizationFloatWorkload.cpp | 13 +++++-- src/backends/neon/workloads/NeonPadWorkload.cpp | 8 ++++- .../neon/workloads/NeonPermuteWorkload.cpp | 8 ++++- .../neon/workloads/NeonPooling2dWorkload.cpp | 8 ++++- src/backends/neon/workloads/NeonPreluWorkload.cpp | 2 +- src/backends/neon/workloads/NeonQLstmWorkload.cpp | 7 ++++ .../neon/workloads/NeonQuantizeWorkload.cpp | 2 +- .../neon/workloads/NeonQuantizedLstmWorkload.cpp | 1 + src/backends/neon/workloads/NeonReduceWorkload.cpp | 20 +++++++---- src/backends/neon/workloads/NeonReduceWorkload.hpp | 2 +- .../neon/workloads/NeonReshapeWorkload.cpp | 2 +- src/backends/neon/workloads/NeonResizeWorkload.cpp | 8 ++++- src/backends/neon/workloads/NeonRsqrtWorkload.cpp | 2 +- src/backends/neon/workloads/NeonSinWorkload.cpp | 2 +- src/backends/neon/workloads/NeonSliceWorkload.cpp | 9 ++++- .../neon/workloads/NeonSoftmaxWorkload.cpp | 8 ++++- .../neon/workloads/NeonSpaceToBatchNdWorkload.cpp | 12 +++++-- .../neon/workloads/NeonSpaceToDepthWorkload.cpp | 14 +++++--- .../neon/workloads/NeonSplitterWorkload.cpp | 8 ++++- src/backends/neon/workloads/NeonStackWorkload.cpp | 8 ++++- .../neon/workloads/NeonStridedSliceWorkload.cpp | 8 ++++- .../neon/workloads/NeonSubtractionWorkload.cpp | 2 +- .../NeonTransposeConvolution2dWorkload.cpp | 19 +++++++++- .../neon/workloads/NeonTransposeWorkload.cpp | 8 ++++- 64 files changed, 384 insertions(+), 122 deletions(-) diff --git a/src/backends/neon/workloads/NeonAbsWorkload.cpp b/src/backends/neon/workloads/NeonAbsWorkload.cpp index ea14ac3897..bd476be1ae 100644 --- a/src/backends/neon/workloads/NeonAbsWorkload.cpp +++ b/src/backends/neon/workloads/NeonAbsWorkload.cpp @@ -35,7 +35,7 @@ NeonAbsWorkload::NeonAbsWorkload(const AbsQueueDescriptor& descriptor, const Wor void NeonAbsWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAbsWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonAbsWorkload_Execute", this->GetGuid()); m_AbsLayer.run(); } diff --git a/src/backends/neon/workloads/NeonActivationWorkload.cpp b/src/backends/neon/workloads/NeonActivationWorkload.cpp index 4b2169a6ee..dd4c97d76b 100644 --- a/src/backends/neon/workloads/NeonActivationWorkload.cpp +++ b/src/backends/neon/workloads/NeonActivationWorkload.cpp @@ -33,6 +33,12 @@ NeonActivationWorkload::NeonActivationWorkload(const ActivationQueueDescriptor& const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonActivationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonActivationWorkload", 1, 1); const arm_compute::ActivationLayerInfo activationLayerInfo = @@ -49,7 +55,7 @@ NeonActivationWorkload::NeonActivationWorkload(const ActivationQueueDescriptor& void NeonActivationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonActivationWorkload_Execute", this->GetGuid()); m_ActivationLayer->run(); } diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp index 5891677c0d..dfbb992e05 100644 --- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp +++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp @@ -56,7 +56,7 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri void NeonAdditionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonAdditionWorkload_Execute", this->GetGuid()); m_AddLayer->run(); } diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp index cc85791ae6..7e9d2c7ec6 100644 --- a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp +++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp @@ -56,6 +56,12 @@ NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& des const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonArgMinMaxWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); @@ -79,7 +85,7 @@ NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& des void NeonArgMinMaxWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonArgMinMaxWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonArgMinMaxWorkload_Execute", this->GetGuid()); m_ArgMinMaxLayer->run(); } diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp index 5da7cca83e..3d0a90bc7d 100644 --- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp +++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp @@ -60,6 +60,12 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchNormalizationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonBatchNormalizationWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -107,7 +113,7 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload( void NeonBatchNormalizationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchNormalizationWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp b/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp index 3d479ff80d..2a35475541 100644 --- a/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp +++ b/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp @@ -19,14 +19,14 @@ using namespace armcomputetensorutils; arm_compute::Status NeonBatchToSpaceNdWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const BatchToSpaceNdDescriptor& desc) + const BatchToSpaceNdDescriptor& descriptor) { - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, desc.m_DataLayout); - const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, desc.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = armnn::numeric_cast(desc.m_BlockShape[0]); - int32_t blockWidth = armnn::numeric_cast(desc.m_BlockShape[1]); + int32_t blockHeight = armnn::numeric_cast(descriptor.m_BlockShape[0]); + int32_t blockWidth = armnn::numeric_cast(descriptor.m_BlockShape[1]); const arm_compute::Status aclStatus = arm_compute::NEBatchToSpaceLayer::validate(&aclInputInfo, blockWidth, @@ -35,10 +35,16 @@ arm_compute::Status NeonBatchToSpaceNdWorkloadValidate(const TensorInfo& input, return aclStatus; } -NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc, +NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchToSpaceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonBatchToSpaceNdWorkload", 1, 1); arm_compute::ITensor& input = @@ -51,8 +57,8 @@ NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueue output.info()->set_data_layout(aclDataLayout); // ArmNN blockShape is [H, W] Cl asks for W, H - int32_t blockHeight = armnn::numeric_cast(desc.m_Parameters.m_BlockShape[0]); - int32_t blockWidth = armnn::numeric_cast(desc.m_Parameters.m_BlockShape[1]); + int32_t blockHeight = armnn::numeric_cast(descriptor.m_Parameters.m_BlockShape[0]); + int32_t blockWidth = armnn::numeric_cast(descriptor.m_Parameters.m_BlockShape[1]); m_Layer.reset(new arm_compute::NEBatchToSpaceLayer()); m_Layer->configure(&input, blockWidth, blockHeight, &output); @@ -63,7 +69,7 @@ void NeonBatchToSpaceNdWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToBatchNdWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToBatchNdWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonCastWorkload.cpp b/src/backends/neon/workloads/NeonCastWorkload.cpp index 4727fe127e..50e212e1bc 100644 --- a/src/backends/neon/workloads/NeonCastWorkload.cpp +++ b/src/backends/neon/workloads/NeonCastWorkload.cpp @@ -37,7 +37,7 @@ NeonCastWorkload::NeonCastWorkload(const CastQueueDescriptor& descriptor, const void NeonCastWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonCastWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonCastWorkload_Execute", this->GetGuid()); m_CastLayer.run(); } diff --git a/src/backends/neon/workloads/NeonComparisonWorkload.cpp b/src/backends/neon/workloads/NeonComparisonWorkload.cpp index 01a6a0c78b..129921abe2 100644 --- a/src/backends/neon/workloads/NeonComparisonWorkload.cpp +++ b/src/backends/neon/workloads/NeonComparisonWorkload.cpp @@ -34,6 +34,12 @@ arm_compute::Status NeonComparisonWorkloadValidate(const TensorInfo& input0, NeonComparisonWorkload::NeonComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonComparisonWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonComparisonWorkload", 2, 1); arm_compute::ITensor& input0 = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -47,7 +53,7 @@ NeonComparisonWorkload::NeonComparisonWorkload(const ComparisonQueueDescriptor& void NeonComparisonWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonComparisonWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonComparisonWorkload_Execute", this->GetGuid()); m_ComparisonLayer.run(); } diff --git a/src/backends/neon/workloads/NeonConcatWorkload.cpp b/src/backends/neon/workloads/NeonConcatWorkload.cpp index 5cd906da41..657a9402ef 100644 --- a/src/backends/neon/workloads/NeonConcatWorkload.cpp +++ b/src/backends/neon/workloads/NeonConcatWorkload.cpp @@ -18,9 +18,9 @@ using namespace armcomputetensorutils; namespace { -size_t CalcAxis(const armnn::OriginsDescriptor& desc) +size_t CalcAxis(const armnn::OriginsDescriptor& descriptor) { - return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1; + return (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1; } } //namespace @@ -50,6 +50,12 @@ NeonConcatWorkload::NeonConcatWorkload( const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConcatWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + bool allInputsAreSubtensors = true; // Check that all inputs are sub-tensors @@ -93,7 +99,7 @@ void NeonConcatWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConcatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConcatWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonConstantWorkload.cpp b/src/backends/neon/workloads/NeonConstantWorkload.cpp index 77e4420794..16bb211816 100644 --- a/src/backends/neon/workloads/NeonConstantWorkload.cpp +++ b/src/backends/neon/workloads/NeonConstantWorkload.cpp @@ -53,7 +53,7 @@ NeonConstantWorkload::NeonConstantWorkload(const ConstantQueueDescriptor& descri void NeonConstantWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConstantWorkload_Execute", this->GetGuid()); using namespace armcomputetensorutils; diff --git a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp index 79d1f22313..e8cc1254e5 100644 --- a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp @@ -24,7 +24,7 @@ NeonConvertBf16ToFp32Workload::NeonConvertBf16ToFp32Workload(const ConvertBf16To void NeonConvertBf16ToFp32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertBf16ToFp32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertBf16ToFp32Workload_Execute", this->GetGuid()); auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) { diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp index 01f09a56f6..0d6bb047f9 100644 --- a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp @@ -24,7 +24,7 @@ NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16To void NeonConvertFp16ToFp32Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp16ToFp32Workload_Execute", this->GetGuid()); auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) { diff --git a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp index e1aceec197..84d3c78b49 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp @@ -25,7 +25,7 @@ NeonConvertFp32ToBf16Workload::NeonConvertFp32ToBf16Workload(const ConvertFp32To void NeonConvertFp32ToBf16Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToBf16Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp32ToBf16Workload_Execute", this->GetGuid()); auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) { diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp index 62f39be467..7f6d4d6215 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp @@ -25,7 +25,7 @@ NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32To void NeonConvertFp32ToFp16Workload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp32ToFp16Workload_Execute", this->GetGuid()); auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size) { diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index a6ae99b481..0b0a72cb46 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -131,7 +131,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( } // Report Profiling Details - ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Execute", + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Construct", descriptor.m_Parameters, detailsInfo, this->GetGuid()); diff --git a/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp b/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp index 2c4a6517e7..76829f376c 100644 --- a/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp @@ -29,10 +29,16 @@ arm_compute::Status NeonDepthToSpaceWorkloadValidate(const TensorInfo& input, return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize); } -NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc, +NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthToSpaceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonDepthToSpaceWorkload", 1, 1); arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); @@ -41,7 +47,7 @@ NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescri PolymorphicPointerDowncast(m_Data.m_Inputs[0])->GetTensor(); input.info()->set_data_layout(aclDataLayout); - int32_t blockSize = armnn::numeric_cast(desc.m_Parameters.m_BlockSize); + int32_t blockSize = armnn::numeric_cast(descriptor.m_Parameters.m_BlockSize); arm_compute::ITensor& output = PolymorphicPointerDowncast(m_Data.m_Outputs[0])->GetTensor(); @@ -53,7 +59,7 @@ NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescri void NeonDepthToSpaceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthToSpaceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthToSpaceWorkload_Execute", this->GetGuid()); m_Layer.run(); } diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp index 589a951825..138c237aba 100644 --- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp @@ -33,20 +33,20 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i const Optional& biases, const ActivationDescriptor* activationDescriptor) { - const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); + const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout); const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout); // ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M] // Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library unsigned int aclDepthMultiplier; TensorInfo weightsPermuted; - std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout); + std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout); // Convert the weights into the compute library format const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout); arm_compute::TensorInfo aclBiasesInfo; - arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr; + arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr; if (descriptor.m_BiasEnabled) { @@ -58,10 +58,10 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor); const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D( - descriptor.m_DilationX,descriptor.m_DilationY); + descriptor.m_DilationX, descriptor.m_DilationY); const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo( - activationDescriptor); + activationDescriptor); return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo, &aclWeightsInfo, @@ -85,9 +85,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( unsigned int depthMultiplier; std::unique_ptr permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]); std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight, - info.m_InputTensorInfos[0], - m_Data.m_Parameters.m_DataLayout, - permuteBuffer.get()); + info.m_InputTensorInfos[0], + m_Data.m_Parameters.m_DataLayout, + permuteBuffer.get()); // Convert the weights into the compute library format m_KernelTensor = std::make_unique(); @@ -100,14 +100,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( } const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D( - m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY); + m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY); m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1); - IAclTensorHandle* inputTensorHandle = static_cast(m_Data.m_Inputs[0]); + IAclTensorHandle* inputTensorHandle = static_cast(m_Data.m_Inputs[0]); IAclTensorHandle* outputTensorHandle = static_cast(m_Data.m_Outputs[0]); - arm_compute::ITensor& input = inputTensorHandle->GetTensor(); + arm_compute::ITensor& input = inputTensorHandle->GetTensor(); arm_compute::ITensor& output = outputTensorHandle->GetTensor(); arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); @@ -129,6 +129,23 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( activationInfo, aclDilationInfo); + // Add details for profiling output + WorkloadInfo detailsInfo; + + detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; + detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; + detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); + if (descriptor.m_Parameters.m_BiasEnabled) + { + detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); + } + + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthwiseConvolution2dWorkload_Construct", + descriptor.m_Parameters, + detailsInfo, + this->GetGuid()); + ARMNN_ASSERT(m_pDepthwiseConvolutionLayer); ScopedTensorHandle weightsPermutedHandle(weightsPermuted); @@ -145,7 +162,7 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload( void NeonDepthwiseConvolutionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthwiseConvolutionWorkload_Execute", this->GetGuid()); ARMNN_ASSERT(m_pDepthwiseConvolutionLayer); m_pDepthwiseConvolutionLayer->run(); diff --git a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp index 07323d19ca..32c1134c35 100644 --- a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp +++ b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp @@ -44,7 +44,7 @@ NeonDequantizeWorkload::NeonDequantizeWorkload(const DequantizeQueueDescriptor& void NeonDequantizeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDequantizeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDequantizeWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp index 36f1cd98de..a9cb5c40be 100644 --- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp +++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp @@ -14,19 +14,19 @@ namespace armnn { -arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& desc) +arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& descriptor) { - return arm_compute::DetectionPostProcessLayerInfo(desc.m_MaxDetections, - desc.m_MaxClassesPerDetection, - desc.m_NmsScoreThreshold, - desc.m_NmsIouThreshold, - desc.m_NumClasses, - { desc.m_ScaleX, - desc.m_ScaleY, - desc.m_ScaleW, - desc.m_ScaleH }, - desc.m_UseRegularNms, - desc.m_DetectionsPerClass); + return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections, + descriptor.m_MaxClassesPerDetection, + descriptor.m_NmsScoreThreshold, + descriptor.m_NmsIouThreshold, + descriptor.m_NumClasses, + { descriptor.m_ScaleX, + descriptor.m_ScaleY, + descriptor.m_ScaleW, + descriptor.m_ScaleH }, + descriptor.m_UseRegularNms, + descriptor.m_DetectionsPerClass); } arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings, @@ -36,9 +36,9 @@ arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodi const TensorInfo& detectionClasses, const TensorInfo& detectionScores, const TensorInfo& numDetections, - const DetectionPostProcessDescriptor &desc) + const DetectionPostProcessDescriptor &descriptor) { - arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(desc); + arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor); const arm_compute::TensorInfo aclBoxEncodings = armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings); @@ -77,6 +77,12 @@ NeonDetectionPostProcessWorkload::NeonDetectionPostProcessWorkload( const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDetectionPostProcessWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Anchors = std::make_unique(); BuildArmComputeTensor(*m_Anchors, descriptor.m_Anchors->GetTensorInfo()); @@ -104,7 +110,7 @@ NeonDetectionPostProcessWorkload::NeonDetectionPostProcessWorkload( void NeonDetectionPostProcessWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDetectionPostProcessWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDetectionPostProcessWorkload_Execute", this->GetGuid()); m_Func.run(); } diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp index 29876ff795..82ef1e2f19 100644 --- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp +++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp @@ -20,7 +20,7 @@ arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodi const TensorInfo& detectionClasses, const TensorInfo& detectionScores, const TensorInfo& numDetections, - const DetectionPostProcessDescriptor &desc); + const DetectionPostProcessDescriptor &descriptor); class NeonDetectionPostProcessWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp index fa61a100a9..8c5d2b80f6 100644 --- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp +++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp @@ -50,7 +50,7 @@ NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descri void NeonDivisionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDivisionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDivisionWorkload_Execute", this->GetGuid()); m_DivLayer.run(); } diff --git a/src/backends/neon/workloads/NeonExpWorkload.cpp b/src/backends/neon/workloads/NeonExpWorkload.cpp index 7baaa84547..aff8e72a4e 100644 --- a/src/backends/neon/workloads/NeonExpWorkload.cpp +++ b/src/backends/neon/workloads/NeonExpWorkload.cpp @@ -25,6 +25,12 @@ arm_compute::Status NeonExpWorkloadValidate(const TensorInfo& input, const Tenso NeonExpWorkload::NeonExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonExpWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonExpWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -35,7 +41,7 @@ NeonExpWorkload::NeonExpWorkload(const ElementwiseUnaryQueueDescriptor& descript void NeonExpWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonExpWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonExpWorkload_Execute", this->GetGuid()); m_ExpLayer.run(); } diff --git a/src/backends/neon/workloads/NeonFillWorkload.cpp b/src/backends/neon/workloads/NeonFillWorkload.cpp index 5965d2000c..0a3c7f0c88 100644 --- a/src/backends/neon/workloads/NeonFillWorkload.cpp +++ b/src/backends/neon/workloads/NeonFillWorkload.cpp @@ -19,6 +19,12 @@ using namespace armcomputetensorutils; NeonFillWorkload::NeonFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFillWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonFillWorkload", 1, 1); arm_compute::ITensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); @@ -31,7 +37,7 @@ NeonFillWorkload::NeonFillWorkload(const FillQueueDescriptor& descriptor, const void NeonFillWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFillWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFillWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp index c49df33a54..d728e00ea6 100644 --- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp @@ -29,7 +29,7 @@ NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descr void NeonFloorFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFloorFloatWorkload_Execute", this->GetGuid()); m_Layer->run(); } } //namespace armnn diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp index 713771be91..94dc07704d 100644 --- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp +++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp @@ -19,6 +19,7 @@ namespace armnn { using namespace armcomputetensorutils; +using ACLMemManagerOnDemand = std::shared_ptr; arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, const TensorInfo& output, @@ -32,10 +33,10 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights); arm_compute::TensorInfo aclBiases; - arm_compute::TensorInfo *optionalAclBiases = nullptr; + arm_compute::TensorInfo* optionalAclBiases = nullptr; if (descriptor.m_BiasEnabled) { - aclBiases = BuildArmComputeTensorInfo(biases); + aclBiases = BuildArmComputeTensorInfo(biases); optionalAclBiases = &aclBiases; } @@ -50,7 +51,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input, } NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) + const WorkloadInfo& info, + ACLMemManagerOnDemand& memoryManager) : BaseWorkload(descriptor, info) { m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1); @@ -69,8 +71,8 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor); - arm_compute::FullyConnectedLayerInfo fc_info = - ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); + arm_compute::FullyConnectedLayerInfo fc_info = + ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo); auto layer = std::make_unique(memoryManager); layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info); @@ -98,6 +100,23 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue } } + // Add details for profiling output + WorkloadInfo detailsInfo; + + detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; + detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; + detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); + if (descriptor.m_Parameters.m_BiasEnabled) + { + detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); + } + + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFullyConnectedWorkload_Construct", + descriptor.m_Parameters, + detailsInfo, + this->GetGuid()); + // Force Compute Library to perform the necessary copying and reshaping, after which // delete all the input tensors that will no longer be needed m_FullyConnectedLayer->prepare(); @@ -106,7 +125,7 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue void NeonFullyConnectedWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFullyConnectedWorkload_Execute", this->GetGuid()); m_FullyConnectedLayer->run(); } diff --git a/src/backends/neon/workloads/NeonGatherWorkload.cpp b/src/backends/neon/workloads/NeonGatherWorkload.cpp index 2c94cb5184..f5c8d34235 100644 --- a/src/backends/neon/workloads/NeonGatherWorkload.cpp +++ b/src/backends/neon/workloads/NeonGatherWorkload.cpp @@ -28,6 +28,12 @@ NeonGatherWorkload::NeonGatherWorkload(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonGatherWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonGatherWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -41,7 +47,7 @@ NeonGatherWorkload::NeonGatherWorkload(const GatherQueueDescriptor& descriptor, void NeonGatherWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonGatherWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonGatherWorkload_Execute", this->GetGuid()); m_Layer.run(); } } //namespace armnn \ No newline at end of file diff --git a/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp index 1bfd1e4d47..a68ea65602 100644 --- a/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp +++ b/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp @@ -35,6 +35,12 @@ NeonInstanceNormalizationWorkload::NeonInstanceNormalizationWorkload( const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonInstanceNormalizationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonInstanceNormalizationWorkload", 1, 1); arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -53,7 +59,7 @@ NeonInstanceNormalizationWorkload::NeonInstanceNormalizationWorkload( void NeonInstanceNormalizationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonInstanceNormalizationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonInstanceNormalizationWorkload_Execute", this->GetGuid()); m_Layer.run(); } diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp index d54607d31e..33b460918c 100644 --- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp @@ -32,6 +32,12 @@ NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2Norma const WorkloadInfo& info, std::shared_ptr& memoryManager) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonL2NormalizationFloatWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -50,7 +56,7 @@ NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2Norma void NeonL2NormalizationFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonL2NormalizationFloatWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp b/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp index ba5c9000f4..8a9743298b 100644 --- a/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp +++ b/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp @@ -35,6 +35,12 @@ NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& std::shared_ptr& memoryManager) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogSoftmaxWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonLogSoftmaxWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -48,7 +54,7 @@ NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor& void NeonLogSoftmaxWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogSoftmaxWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogSoftmaxWorkload_Execute", this->GetGuid()); m_LogSoftmaxLayer->run(); } diff --git a/src/backends/neon/workloads/NeonLogWorkload.cpp b/src/backends/neon/workloads/NeonLogWorkload.cpp index 460f5b3b09..0fb8f8aa62 100644 --- a/src/backends/neon/workloads/NeonLogWorkload.cpp +++ b/src/backends/neon/workloads/NeonLogWorkload.cpp @@ -35,7 +35,7 @@ NeonLogWorkload::NeonLogWorkload(const ElementwiseUnaryQueueDescriptor& descript void NeonLogWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogWorkload_Execute", this->GetGuid()); m_LogLayer.run(); } diff --git a/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp b/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp index d85e05cfe8..179e495292 100644 --- a/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp +++ b/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp @@ -33,6 +33,12 @@ NeonLogicalAndWorkload::NeonLogicalAndWorkload(const LogicalBinaryQueueDescripto const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalAndWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonLogicalAndWorkload", 2, 1); arm_compute::ITensor& input0 = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -44,7 +50,7 @@ NeonLogicalAndWorkload::NeonLogicalAndWorkload(const LogicalBinaryQueueDescripto void NeonLogicalAndWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalAndWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalAndWorkload_Execute", this->GetGuid()); m_LogicalAndLayer.run(); } diff --git a/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp b/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp index cff5eaf2ba..16bf4e855d 100644 --- a/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp +++ b/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp @@ -31,6 +31,12 @@ NeonLogicalNotWorkload::NeonLogicalNotWorkload(const ElementwiseUnaryQueueDescri const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalNotWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonLogicalNotWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -41,7 +47,7 @@ NeonLogicalNotWorkload::NeonLogicalNotWorkload(const ElementwiseUnaryQueueDescri void NeonLogicalNotWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalNotWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalNotWorkload_Execute", this->GetGuid()); m_LogicalNotLayer.run(); } diff --git a/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp b/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp index c3f21e149d..301f432673 100644 --- a/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp +++ b/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp @@ -30,9 +30,15 @@ arm_compute::Status NeonLogicalOrWorkloadValidate(const TensorInfo& input0, } NeonLogicalOrWorkload::NeonLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor, - const WorkloadInfo& info) + const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalOrWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonLogicalOrWorkload", 2, 1); arm_compute::ITensor& input0 = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -44,7 +50,7 @@ NeonLogicalOrWorkload::NeonLogicalOrWorkload(const LogicalBinaryQueueDescriptor& void NeonLogicalOrWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalOrWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalOrWorkload_Execute", this->GetGuid()); m_LogicalOrLayer.run(); } diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp index 175e908817..f80da03ba1 100644 --- a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp @@ -19,6 +19,12 @@ using namespace armcomputetensorutils; NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLstmFloatWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + arm_compute::LSTMParams lstm_param; // Basic parameters @@ -267,6 +273,7 @@ NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor &descript void NeonLstmFloatWorkload::Execute() const { + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLstmFloatWorkload_Execute", this->GetGuid()); m_LstmLayer.run(); } diff --git a/src/backends/neon/workloads/NeonMaximumWorkload.cpp b/src/backends/neon/workloads/NeonMaximumWorkload.cpp index c4500d885a..0f95af5f98 100644 --- a/src/backends/neon/workloads/NeonMaximumWorkload.cpp +++ b/src/backends/neon/workloads/NeonMaximumWorkload.cpp @@ -39,7 +39,7 @@ NeonMaximumWorkload::NeonMaximumWorkload(const MaximumQueueDescriptor& descripto void NeonMaximumWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMaximumWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMaximumWorkload_Execute", this->GetGuid()); m_MaxLayer.run(); } diff --git a/src/backends/neon/workloads/NeonMeanWorkload.cpp b/src/backends/neon/workloads/NeonMeanWorkload.cpp index bb0870d9ef..5d8d1c43a1 100644 --- a/src/backends/neon/workloads/NeonMeanWorkload.cpp +++ b/src/backends/neon/workloads/NeonMeanWorkload.cpp @@ -17,21 +17,27 @@ using namespace armcomputetensorutils; arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const MeanDescriptor& desc) + const MeanDescriptor& descriptor) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), - desc.m_Axis); + descriptor.m_Axis); - return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo); + return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo); } NeonMeanWorkload::NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonMeanWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonMeanWorkload", 1, 1); arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -46,7 +52,7 @@ NeonMeanWorkload::NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const void NeonMeanWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMeanWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMeanWorkload_Execute", this->GetGuid()); m_Layer.run(); } diff --git a/src/backends/neon/workloads/NeonMeanWorkload.hpp b/src/backends/neon/workloads/NeonMeanWorkload.hpp index 055b52a011..5d16588da2 100644 --- a/src/backends/neon/workloads/NeonMeanWorkload.hpp +++ b/src/backends/neon/workloads/NeonMeanWorkload.hpp @@ -14,7 +14,7 @@ namespace armnn arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const MeanDescriptor& desc); + const MeanDescriptor& descriptor); class NeonMeanWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonMinimumWorkload.cpp b/src/backends/neon/workloads/NeonMinimumWorkload.cpp index 519b3c4bc6..5212947022 100644 --- a/src/backends/neon/workloads/NeonMinimumWorkload.cpp +++ b/src/backends/neon/workloads/NeonMinimumWorkload.cpp @@ -40,7 +40,7 @@ NeonMinimumWorkload::NeonMinimumWorkload(const MinimumQueueDescriptor& descripto void NeonMinimumWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMinimumWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMinimumWorkload_Execute", this->GetGuid()); m_MinLayer.run(); } diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp index e4ed195922..0ec550861f 100644 --- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp +++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp @@ -77,7 +77,7 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue void NeonMultiplicationWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMultiplicationWorkload_Execute", this->GetGuid()); m_PixelWiseMultiplication->run(); } diff --git a/src/backends/neon/workloads/NeonNegWorkload.cpp b/src/backends/neon/workloads/NeonNegWorkload.cpp index 06c146754c..e7705e64b4 100644 --- a/src/backends/neon/workloads/NeonNegWorkload.cpp +++ b/src/backends/neon/workloads/NeonNegWorkload.cpp @@ -35,7 +35,7 @@ NeonNegWorkload::NeonNegWorkload(const ElementwiseUnaryQueueDescriptor& descript void NeonNegWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNegWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonNegWorkload_Execute", this->GetGuid()); m_NegLayer.run(); } diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp index 77fc429b95..92d499737e 100644 --- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp @@ -19,6 +19,7 @@ namespace armnn namespace { +using ACLMemManagerOnDemand = std::shared_ptr; bool IsNeonNormalizationDescriptorSupported(const NormalizationDescriptor& parameters, Optional reasonIfUnsupported) @@ -58,10 +59,16 @@ arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input, } NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, - const WorkloadInfo& info, - std::shared_ptr& memoryManager) + const WorkloadInfo& info, + ACLMemManagerOnDemand& memoryManager) : FloatWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonNormalizationWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1); std::string reasonIfUnsupported; if (!IsNeonNormalizationDescriptorSupported(m_Data.m_Parameters, Optional(reasonIfUnsupported))) @@ -99,7 +106,7 @@ NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const Normalizati void NeonNormalizationFloatWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonNormalizationFloatWorkload_Execute", this->GetGuid()); m_NormalizationLayer->run(); } diff --git a/src/backends/neon/workloads/NeonPadWorkload.cpp b/src/backends/neon/workloads/NeonPadWorkload.cpp index 19cdefc8ac..b378d5f843 100644 --- a/src/backends/neon/workloads/NeonPadWorkload.cpp +++ b/src/backends/neon/workloads/NeonPadWorkload.cpp @@ -19,6 +19,12 @@ using namespace armcomputetensorutils; NeonPadWorkload::NeonPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPadWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonPadWorkload", 1, 1); arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -41,7 +47,7 @@ NeonPadWorkload::NeonPadWorkload(const PadQueueDescriptor& descriptor, const Wor void NeonPadWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPadWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPadWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.cpp b/src/backends/neon/workloads/NeonPermuteWorkload.cpp index a5ecbcb2c0..9e18f7f032 100644 --- a/src/backends/neon/workloads/NeonPermuteWorkload.cpp +++ b/src/backends/neon/workloads/NeonPermuteWorkload.cpp @@ -28,6 +28,12 @@ NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descripto const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPermuteWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + using armcomputetensorutils::BuildArmComputePermutationVector; m_Data.ValidateInputsOutputs(GetName(), 1, 1); @@ -42,7 +48,7 @@ NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descripto void NeonPermuteWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(GetName() + "_Execute", this->GetGuid()); m_PermuteFunction.run(); } diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp index 968d5ce02d..2115e93872 100644 --- a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp @@ -37,6 +37,12 @@ NeonPooling2dWorkload::NeonPooling2dWorkload( const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPooling2dWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonPooling2dWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -55,7 +61,7 @@ NeonPooling2dWorkload::NeonPooling2dWorkload( void NeonPooling2dWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPooling2dWorkload_Execute", this->GetGuid()); m_PoolingLayer->run(); } diff --git a/src/backends/neon/workloads/NeonPreluWorkload.cpp b/src/backends/neon/workloads/NeonPreluWorkload.cpp index 8e6ea301de..af03e7960d 100644 --- a/src/backends/neon/workloads/NeonPreluWorkload.cpp +++ b/src/backends/neon/workloads/NeonPreluWorkload.cpp @@ -45,7 +45,7 @@ NeonPreluWorkload::NeonPreluWorkload(const PreluQueueDescriptor& descriptor, void NeonPreluWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPreluWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPreluWorkload_Execute", this->GetGuid()); m_PreluLayer->run(); } diff --git a/src/backends/neon/workloads/NeonQLstmWorkload.cpp b/src/backends/neon/workloads/NeonQLstmWorkload.cpp index fd979d6533..c25262afa4 100644 --- a/src/backends/neon/workloads/NeonQLstmWorkload.cpp +++ b/src/backends/neon/workloads/NeonQLstmWorkload.cpp @@ -17,6 +17,12 @@ using namespace armcomputetensorutils; NeonQLstmWorkload::NeonQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonQLstmWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + arm_compute::LSTMParams qLstmParams; // Mandatory params @@ -230,6 +236,7 @@ NeonQLstmWorkload::NeonQLstmWorkload(const QLstmQueueDescriptor& descriptor, con void NeonQLstmWorkload::Execute() const { + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizedLstmWorkload_Execute", this->GetGuid()); m_QLstmLayer.run(); } diff --git a/src/backends/neon/workloads/NeonQuantizeWorkload.cpp b/src/backends/neon/workloads/NeonQuantizeWorkload.cpp index 14fbdf3dd9..f50ca81cc3 100644 --- a/src/backends/neon/workloads/NeonQuantizeWorkload.cpp +++ b/src/backends/neon/workloads/NeonQuantizeWorkload.cpp @@ -43,7 +43,7 @@ void NeonQuantizeWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonQuantizeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizeWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp index d809017692..e36fde4d89 100644 --- a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp +++ b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp @@ -124,6 +124,7 @@ NeonQuantizedLstmWorkload::NeonQuantizedLstmWorkload(const QuantizedLstmQueueDes void NeonQuantizedLstmWorkload::Execute() const { + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizedLstmWorkload_Execute", this->GetGuid()); m_QuantizedLstmLayer.run(); } diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp index 1436cd1192..bf7ce9892e 100644 --- a/src/backends/neon/workloads/NeonReduceWorkload.cpp +++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp @@ -18,28 +18,28 @@ using namespace armcomputetensorutils; arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const ReduceDescriptor& desc) + const ReduceDescriptor& descriptor) { - if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty()) + if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty()) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), - desc.m_vAxis); + descriptor.m_vAxis); return arm_compute::NEReductionOperation::validate(&aclInputInfo, &aclOutputInfo, static_cast(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); + ConvertReductionOperationToAcl(descriptor), + descriptor.m_KeepDims); } else { // Validate layer if there are multiple axes. arm_compute::Status status; - IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, desc, status); + IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, descriptor, status); return status; } } @@ -47,6 +47,12 @@ arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonReduceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1); arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -65,7 +71,7 @@ NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, void NeonReduceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonReduceWorkload_Execute", this->GetGuid()); m_Layer.run(); } diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp index 0472091fbf..ddeac1267c 100644 --- a/src/backends/neon/workloads/NeonReduceWorkload.hpp +++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp @@ -14,7 +14,7 @@ namespace armnn arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, const TensorInfo& output, - const ReduceDescriptor& desc); + const ReduceDescriptor& descriptor); class NeonReduceWorkload : public BaseWorkload { diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.cpp b/src/backends/neon/workloads/NeonReshapeWorkload.cpp index 8b11da7253..7f2f225c23 100644 --- a/src/backends/neon/workloads/NeonReshapeWorkload.cpp +++ b/src/backends/neon/workloads/NeonReshapeWorkload.cpp @@ -39,7 +39,7 @@ NeonReshapeWorkload::NeonReshapeWorkload(const ReshapeQueueDescriptor& descripto void NeonReshapeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonReshapeWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonResizeWorkload.cpp b/src/backends/neon/workloads/NeonResizeWorkload.cpp index ab01e30140..ecb43ae740 100644 --- a/src/backends/neon/workloads/NeonResizeWorkload.cpp +++ b/src/backends/neon/workloads/NeonResizeWorkload.cpp @@ -53,6 +53,12 @@ NeonResizeWorkload::NeonResizeWorkload(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonResizeWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonResizeWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -83,7 +89,7 @@ NeonResizeWorkload::NeonResizeWorkload(const ResizeQueueDescriptor& descriptor, void NeonResizeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonResizeWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonResizeWorkload_Execute", this->GetGuid()); m_ResizeLayer.run(); } diff --git a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp index 44980df996..13615f982c 100644 --- a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp +++ b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp @@ -36,7 +36,7 @@ NeonRsqrtWorkload::NeonRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, con void NeonRsqrtWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonRsqrtWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonRsqrtWorkload_Execute", this->GetGuid()); m_RsqrtLayer.run(); } diff --git a/src/backends/neon/workloads/NeonSinWorkload.cpp b/src/backends/neon/workloads/NeonSinWorkload.cpp index ac2bd4965a..4602a9f251 100644 --- a/src/backends/neon/workloads/NeonSinWorkload.cpp +++ b/src/backends/neon/workloads/NeonSinWorkload.cpp @@ -35,7 +35,7 @@ NeonSinWorkload::NeonSinWorkload(const ElementwiseUnaryQueueDescriptor& descript void NeonSinWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSinWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSinWorkload_Execute", this->GetGuid()); m_SinLayer.run(); } diff --git a/src/backends/neon/workloads/NeonSliceWorkload.cpp b/src/backends/neon/workloads/NeonSliceWorkload.cpp index 32cc042eab..86ae303d56 100644 --- a/src/backends/neon/workloads/NeonSliceWorkload.cpp +++ b/src/backends/neon/workloads/NeonSliceWorkload.cpp @@ -37,6 +37,13 @@ NeonSliceWorkload::NeonSliceWorkload(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSliceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + + m_Data.ValidateInputsOutputs("NeonSliceWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -52,7 +59,7 @@ NeonSliceWorkload::NeonSliceWorkload(const SliceQueueDescriptor& descriptor, void NeonSliceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSliceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSliceWorkload_Execute", this->GetGuid()); m_SliceFunction.run(); } diff --git a/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp index 505844e24a..da20479d82 100644 --- a/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp +++ b/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp @@ -34,6 +34,12 @@ NeonSoftmaxWorkload::NeonSoftmaxWorkload(const SoftmaxQueueDescriptor& descripto const WorkloadInfo& info, std::shared_ptr& memoryManager) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSoftmaxWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonSoftmaxWorkload", 1, 1); // The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions. @@ -48,7 +54,7 @@ NeonSoftmaxWorkload::NeonSoftmaxWorkload(const SoftmaxQueueDescriptor& descripto void NeonSoftmaxWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSoftmaxWorkload_Execute", this->GetGuid()); m_SoftmaxLayer->run(); } diff --git a/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp index 42dd49cdc1..d7880e0f8d 100644 --- a/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp +++ b/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp @@ -41,10 +41,16 @@ arm_compute::Status NeonSpaceToBatchNdWorkloadValidate(const TensorInfo& input, &aclOutputInfo); } -NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& desc, +NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSpaceToBatchNdWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NESpaceToBatchNdWorkload", 1, 1); arm_compute::ITensor& input = @@ -79,7 +85,7 @@ void NeonSpaceToBatchNdWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToBatchNdWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToBatchNdWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp index 43c991cfb2..b96b7d05ac 100644 --- a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp +++ b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp @@ -29,10 +29,16 @@ arm_compute::Status NeonSpaceToDepthWorkloadValidate(const TensorInfo& input, return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize); } -NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc, +NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSpaceToDepthWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonSpaceToDepthWorkload", 1, 1); arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout); @@ -40,7 +46,7 @@ NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescri arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); input.info()->set_data_layout(aclDataLayout); - int32_t blockSize = armnn::numeric_cast(desc.m_Parameters.m_BlockSize); + int32_t blockSize = armnn::numeric_cast(descriptor.m_Parameters.m_BlockSize); arm_compute::ITensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); output.info()->set_data_layout(aclDataLayout); @@ -54,7 +60,7 @@ void NeonSpaceToDepthWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToDepthWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToDepthWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonSplitterWorkload.cpp b/src/backends/neon/workloads/NeonSplitterWorkload.cpp index 4e428a2654..ea1def63d6 100644 --- a/src/backends/neon/workloads/NeonSplitterWorkload.cpp +++ b/src/backends/neon/workloads/NeonSplitterWorkload.cpp @@ -56,6 +56,12 @@ arm_compute::Status NeonSplitterWorkloadValidate(const TensorInfo& input, NeonSplitterWorkload::NeonSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSplitterWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + bool allOutputsAreSubtensors = true; // Check that all outputs are sub-tensors @@ -106,7 +112,7 @@ void NeonSplitterWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSplitterWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonStackWorkload.cpp b/src/backends/neon/workloads/NeonStackWorkload.cpp index 0b327b8a37..ad9bea1554 100644 --- a/src/backends/neon/workloads/NeonStackWorkload.cpp +++ b/src/backends/neon/workloads/NeonStackWorkload.cpp @@ -49,6 +49,12 @@ arm_compute::Status NeonStackWorkloadValidate(const std::vector(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonStackWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + std::vector aclInputs; for (auto input : m_Data.m_Inputs) { @@ -67,7 +73,7 @@ void NeonStackWorkload::Execute() const { if (m_Layer) { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStackWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonStackWorkload_Execute", this->GetGuid()); m_Layer->run(); } } diff --git a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp index d0aee07f9b..d9ec727e4b 100644 --- a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp +++ b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp @@ -50,6 +50,12 @@ NeonStridedSliceWorkload::NeonStridedSliceWorkload(const StridedSliceQueueDescri const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonStridedSliceWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs("NeonStridedSliceWorkload", 1, 1); arm_compute::ITensor& input = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); @@ -87,7 +93,7 @@ NeonStridedSliceWorkload::NeonStridedSliceWorkload(const StridedSliceQueueDescri void NeonStridedSliceWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStridedSliceWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonStridedSliceWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp index 64f68aa6e2..68bf15435f 100644 --- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp +++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp @@ -57,7 +57,7 @@ NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescripto void NeonSubtractionWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSubtractionWorkload_Execute", this->GetGuid()); m_SubLayer->run(); } diff --git a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp index a1e545ce05..f9e1b36306 100644 --- a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp @@ -77,6 +77,23 @@ NeonTransposeConvolution2dWorkload::NeonTransposeConvolution2dWorkload( arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters); + // Add details for profiling output + WorkloadInfo detailsInfo; + + detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; + detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos; + detailsInfo.m_WeightsTensorInfo = armnn::Optional(descriptor.m_Weight->GetTensorInfo()); + if (descriptor.m_Parameters.m_BiasEnabled) + { + detailsInfo.m_BiasTensorInfo = armnn::Optional(descriptor.m_Bias->GetTensorInfo()); + } + + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonTransposeConvolution2dWorkload_Construct", + descriptor.m_Parameters, + detailsInfo, + this->GetGuid()); + m_Layer = std::make_unique(memoryManager); m_Layer->configure(&input, m_KernelTensor.get(), m_BiasTensor.get(), &output, padStrideInfo); @@ -95,7 +112,7 @@ NeonTransposeConvolution2dWorkload::NeonTransposeConvolution2dWorkload( void NeonTransposeConvolution2dWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonTransposeConvolution2dWorkload_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonTransposeConvolution2dWorkload_Execute", this->GetGuid()); m_Layer->run(); } diff --git a/src/backends/neon/workloads/NeonTransposeWorkload.cpp b/src/backends/neon/workloads/NeonTransposeWorkload.cpp index c11f2df2d2..2e4f358482 100644 --- a/src/backends/neon/workloads/NeonTransposeWorkload.cpp +++ b/src/backends/neon/workloads/NeonTransposeWorkload.cpp @@ -28,6 +28,12 @@ NeonTransposeWorkload::NeonTransposeWorkload(const TransposeQueueDescriptor& des const WorkloadInfo& info) : BaseWorkload(descriptor, info) { + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonTransposeWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + m_Data.ValidateInputsOutputs(GetName(), 1, 1); const arm_compute::ITensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); @@ -41,7 +47,7 @@ NeonTransposeWorkload::NeonTransposeWorkload(const TransposeQueueDescriptor& des void NeonTransposeWorkload::Execute() const { - ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute"); + ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(GetName() + "_Execute", this->GetGuid()); m_PermuteFunction.run(); } -- cgit v1.2.1