aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Davis <keith.davis@arm.com>2021-08-05 11:35:00 +0100
committerKeithARM <keith.davis@arm.com>2021-08-10 11:53:19 +0000
commit2d0679f33f75c43e7169fe0f0ee2d15d0620e091 (patch)
tree9f259a41e3208aa37a19330b63e0aabac607cacf
parentd218d9804723e78da9bbd36e6211b3310426852b (diff)
downloadarmnn-2d0679f33f75c43e7169fe0f0ee2d15d0620e091.tar.gz
IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in Neon
Signed-off-by: Keith Davis <keith.davis@arm.com> Change-Id: I7be77712a9f790928219ce91222d46cc766ab9dd
-rw-r--r--src/backends/neon/workloads/NeonAbsWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonActivationWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonAdditionWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp26
-rw-r--r--src/backends/neon/workloads/NeonCastWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonComparisonWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonConcatWorkload.cpp12
-rw-r--r--src/backends/neon/workloads/NeonConstantWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonConvolution2dWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp14
-rw-r--r--src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp41
-rw-r--r--src/backends/neon/workloads/NeonDequantizeWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp36
-rw-r--r--src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp2
-rw-r--r--src/backends/neon/workloads/NeonDivisionWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonExpWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonFillWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonFloorFloatWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp31
-rw-r--r--src/backends/neon/workloads/NeonGatherWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonLogWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonLogicalAndWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonLogicalNotWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonLogicalOrWorkload.cpp10
-rw-r--r--src/backends/neon/workloads/NeonLstmFloatWorkload.cpp7
-rw-r--r--src/backends/neon/workloads/NeonMaximumWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonMeanWorkload.cpp14
-rw-r--r--src/backends/neon/workloads/NeonMeanWorkload.hpp2
-rw-r--r--src/backends/neon/workloads/NeonMinimumWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonMultiplicationWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonNegWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp13
-rw-r--r--src/backends/neon/workloads/NeonPadWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonPermuteWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonPooling2dWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonPreluWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonQLstmWorkload.cpp7
-rw-r--r--src/backends/neon/workloads/NeonQuantizeWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp1
-rw-r--r--src/backends/neon/workloads/NeonReduceWorkload.cpp20
-rw-r--r--src/backends/neon/workloads/NeonReduceWorkload.hpp2
-rw-r--r--src/backends/neon/workloads/NeonReshapeWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonResizeWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonRsqrtWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonSinWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonSliceWorkload.cpp9
-rw-r--r--src/backends/neon/workloads/NeonSoftmaxWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp12
-rw-r--r--src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp14
-rw-r--r--src/backends/neon/workloads/NeonSplitterWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonStackWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonStridedSliceWorkload.cpp8
-rw-r--r--src/backends/neon/workloads/NeonSubtractionWorkload.cpp2
-rw-r--r--src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp19
-rw-r--r--src/backends/neon/workloads/NeonTransposeWorkload.cpp8
64 files changed, 384 insertions, 122 deletions
diff --git a/src/backends/neon/workloads/NeonAbsWorkload.cpp b/src/backends/neon/workloads/NeonAbsWorkload.cpp
index ea14ac3897..bd476be1ae 100644
--- a/src/backends/neon/workloads/NeonAbsWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAbsWorkload.cpp
@@ -35,7 +35,7 @@ NeonAbsWorkload::NeonAbsWorkload(const AbsQueueDescriptor& descriptor, const Wor
void NeonAbsWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAbsWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonAbsWorkload_Execute", this->GetGuid());
m_AbsLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonActivationWorkload.cpp b/src/backends/neon/workloads/NeonActivationWorkload.cpp
index 4b2169a6ee..dd4c97d76b 100644
--- a/src/backends/neon/workloads/NeonActivationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonActivationWorkload.cpp
@@ -33,6 +33,12 @@ NeonActivationWorkload::NeonActivationWorkload(const ActivationQueueDescriptor&
const WorkloadInfo& info)
: BaseWorkload<ActivationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonActivationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonActivationWorkload", 1, 1);
const arm_compute::ActivationLayerInfo activationLayerInfo =
@@ -49,7 +55,7 @@ NeonActivationWorkload::NeonActivationWorkload(const ActivationQueueDescriptor&
void NeonActivationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonActivationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonActivationWorkload_Execute", this->GetGuid());
m_ActivationLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonAdditionWorkload.cpp b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
index 5891677c0d..dfbb992e05 100644
--- a/src/backends/neon/workloads/NeonAdditionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonAdditionWorkload.cpp
@@ -56,7 +56,7 @@ NeonAdditionWorkload::NeonAdditionWorkload(const AdditionQueueDescriptor& descri
void NeonAdditionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonAdditionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonAdditionWorkload_Execute", this->GetGuid());
m_AddLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
index cc85791ae6..7e9d2c7ec6 100644
--- a/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonArgMinMaxWorkload.cpp
@@ -56,6 +56,12 @@ NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& des
const WorkloadInfo& info)
: BaseWorkload<ArgMinMaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonArgMinMaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
@@ -79,7 +85,7 @@ NeonArgMinMaxWorkload::NeonArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& des
void NeonArgMinMaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonArgMinMaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonArgMinMaxWorkload_Execute", this->GetGuid());
m_ArgMinMaxLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
index 5da7cca83e..3d0a90bc7d 100644
--- a/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchNormalizationWorkload.cpp
@@ -60,6 +60,12 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<BatchNormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonBatchNormalizationWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -107,7 +113,7 @@ NeonBatchNormalizationWorkload::NeonBatchNormalizationWorkload(
void NeonBatchNormalizationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonBatchNormalizationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchNormalizationWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp b/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp
index 3d479ff80d..2a35475541 100644
--- a/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp
+++ b/src/backends/neon/workloads/NeonBatchToSpaceNdWorkload.cpp
@@ -19,14 +19,14 @@ using namespace armcomputetensorutils;
arm_compute::Status NeonBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const BatchToSpaceNdDescriptor& desc)
+ const BatchToSpaceNdDescriptor& descriptor)
{
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, desc.m_DataLayout);
- const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, desc.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
// ArmNN blockShape is [H, W] Cl asks for W, H
- int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_BlockShape[0]);
- int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_BlockShape[1]);
+ int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[0]);
+ int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_BlockShape[1]);
const arm_compute::Status aclStatus = arm_compute::NEBatchToSpaceLayer::validate(&aclInputInfo,
blockWidth,
@@ -35,10 +35,16 @@ arm_compute::Status NeonBatchToSpaceNdWorkloadValidate(const TensorInfo& input,
return aclStatus;
}
-NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& desc,
+NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<BatchToSpaceNdQueueDescriptor>(desc, info)
+ : BaseWorkload<BatchToSpaceNdQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchToSpaceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonBatchToSpaceNdWorkload", 1, 1);
arm_compute::ITensor& input =
@@ -51,8 +57,8 @@ NeonBatchToSpaceNdWorkload::NeonBatchToSpaceNdWorkload(const BatchToSpaceNdQueue
output.info()->set_data_layout(aclDataLayout);
// ArmNN blockShape is [H, W] Cl asks for W, H
- int32_t blockHeight = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[0]);
- int32_t blockWidth = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockShape[1]);
+ int32_t blockHeight = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[0]);
+ int32_t blockWidth = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockShape[1]);
m_Layer.reset(new arm_compute::NEBatchToSpaceLayer());
m_Layer->configure(&input, blockWidth, blockHeight, &output);
@@ -63,7 +69,7 @@ void NeonBatchToSpaceNdWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToBatchNdWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToBatchNdWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonCastWorkload.cpp b/src/backends/neon/workloads/NeonCastWorkload.cpp
index 4727fe127e..50e212e1bc 100644
--- a/src/backends/neon/workloads/NeonCastWorkload.cpp
+++ b/src/backends/neon/workloads/NeonCastWorkload.cpp
@@ -37,7 +37,7 @@ NeonCastWorkload::NeonCastWorkload(const CastQueueDescriptor& descriptor, const
void NeonCastWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonCastWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonCastWorkload_Execute", this->GetGuid());
m_CastLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonComparisonWorkload.cpp b/src/backends/neon/workloads/NeonComparisonWorkload.cpp
index 01a6a0c78b..129921abe2 100644
--- a/src/backends/neon/workloads/NeonComparisonWorkload.cpp
+++ b/src/backends/neon/workloads/NeonComparisonWorkload.cpp
@@ -34,6 +34,12 @@ arm_compute::Status NeonComparisonWorkloadValidate(const TensorInfo& input0,
NeonComparisonWorkload::NeonComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ComparisonQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonComparisonWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonComparisonWorkload", 2, 1);
arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -47,7 +53,7 @@ NeonComparisonWorkload::NeonComparisonWorkload(const ComparisonQueueDescriptor&
void NeonComparisonWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonComparisonWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonComparisonWorkload_Execute", this->GetGuid());
m_ComparisonLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonConcatWorkload.cpp b/src/backends/neon/workloads/NeonConcatWorkload.cpp
index 5cd906da41..657a9402ef 100644
--- a/src/backends/neon/workloads/NeonConcatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConcatWorkload.cpp
@@ -18,9 +18,9 @@ using namespace armcomputetensorutils;
namespace
{
-size_t CalcAxis(const armnn::OriginsDescriptor& desc)
+size_t CalcAxis(const armnn::OriginsDescriptor& descriptor)
{
- return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+ return (descriptor.GetNumDimensions() - descriptor.GetConcatAxis()) - 1;
}
} //namespace
@@ -50,6 +50,12 @@ NeonConcatWorkload::NeonConcatWorkload(
const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ConcatQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConcatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
bool allInputsAreSubtensors = true;
// Check that all inputs are sub-tensors
@@ -93,7 +99,7 @@ void NeonConcatWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConcatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConcatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonConstantWorkload.cpp b/src/backends/neon/workloads/NeonConstantWorkload.cpp
index 77e4420794..16bb211816 100644
--- a/src/backends/neon/workloads/NeonConstantWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConstantWorkload.cpp
@@ -53,7 +53,7 @@ NeonConstantWorkload::NeonConstantWorkload(const ConstantQueueDescriptor& descri
void NeonConstantWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConstantWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConstantWorkload_Execute", this->GetGuid());
using namespace armcomputetensorutils;
diff --git a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp
index 79d1f22313..e8cc1254e5 100644
--- a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp
@@ -24,7 +24,7 @@ NeonConvertBf16ToFp32Workload::NeonConvertBf16ToFp32Workload(const ConvertBf16To
void NeonConvertBf16ToFp32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertBf16ToFp32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertBf16ToFp32Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp
index 01f09a56f6..0d6bb047f9 100644
--- a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp
@@ -24,7 +24,7 @@ NeonConvertFp16ToFp32Workload::NeonConvertFp16ToFp32Workload(const ConvertFp16To
void NeonConvertFp16ToFp32Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp16ToFp32Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp16ToFp32Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp
index e1aceec197..84d3c78b49 100644
--- a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp
@@ -25,7 +25,7 @@ NeonConvertFp32ToBf16Workload::NeonConvertFp32ToBf16Workload(const ConvertFp32To
void NeonConvertFp32ToBf16Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToBf16Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp32ToBf16Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp
index 62f39be467..7f6d4d6215 100644
--- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp
+++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp
@@ -25,7 +25,7 @@ NeonConvertFp32ToFp16Workload::NeonConvertFp32ToFp16Workload(const ConvertFp32To
void NeonConvertFp32ToFp16Workload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonConvertFp32ToFp16Workload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonConvertFp32ToFp16Workload_Execute", this->GetGuid());
auto convertFunc = [](uint8_t* dst, const uint8_t* src, size_t size)
{
diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
index a6ae99b481..0b0a72cb46 100644
--- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp
@@ -131,7 +131,7 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload(
}
// Report Profiling Details
- ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Execute",
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonConvolution2dWorkload_Construct",
descriptor.m_Parameters,
detailsInfo,
this->GetGuid());
diff --git a/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp b/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp
index 2c4a6517e7..76829f376c 100644
--- a/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthToSpaceWorkload.cpp
@@ -29,10 +29,16 @@ arm_compute::Status NeonDepthToSpaceWorkloadValidate(const TensorInfo& input,
return arm_compute::NEDepthToSpaceLayer::validate(&aclInput, &aclOutput, blockSize);
}
-NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& desc,
+NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<DepthToSpaceQueueDescriptor>(desc, info)
+ : BaseWorkload<DepthToSpaceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthToSpaceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonDepthToSpaceWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -41,7 +47,7 @@ NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescri
PolymorphicPointerDowncast<IAclTensorHandle>(m_Data.m_Inputs[0])->GetTensor();
input.info()->set_data_layout(aclDataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
arm_compute::ITensor& output =
PolymorphicPointerDowncast<IAclTensorHandle>(m_Data.m_Outputs[0])->GetTensor();
@@ -53,7 +59,7 @@ NeonDepthToSpaceWorkload::NeonDepthToSpaceWorkload(const DepthToSpaceQueueDescri
void NeonDepthToSpaceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthToSpaceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthToSpaceWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
index 589a951825..138c237aba 100644
--- a/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDepthwiseConvolutionWorkload.cpp
@@ -33,20 +33,20 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
const Optional<TensorInfo>& biases,
const ActivationDescriptor* activationDescriptor)
{
- const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
+ const arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(input, descriptor.m_DataLayout);
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output, descriptor.m_DataLayout);
// ArmNN's weight format is usually [ M, I, H, W ] but for depthwise its [ 1, H, W, I*M]
// Permute to [ 1, I * M, H, W ] (if NCHW), as required by the compute library
unsigned int aclDepthMultiplier;
TensorInfo weightsPermuted;
- std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input,descriptor.m_DataLayout);
+ std::tie(weightsPermuted, aclDepthMultiplier) = Convert1HWOTensorInfoToAcl(weights, input, descriptor.m_DataLayout);
// Convert the weights into the compute library format
const arm_compute::TensorInfo aclWeightsInfo = BuildArmComputeTensorInfo(weightsPermuted, descriptor.m_DataLayout);
arm_compute::TensorInfo aclBiasesInfo;
- arm_compute::TensorInfo *optionalAclBiasesInfo = nullptr;
+ arm_compute::TensorInfo* optionalAclBiasesInfo = nullptr;
if (descriptor.m_BiasEnabled)
{
@@ -58,10 +58,10 @@ arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo& i
arm_compute::PadStrideInfo aclPadStrideInfo = BuildArmComputePadStrideInfo(descriptor);
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
- descriptor.m_DilationX,descriptor.m_DilationY);
+ descriptor.m_DilationX, descriptor.m_DilationY);
const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
- activationDescriptor);
+ activationDescriptor);
return arm_compute::NEDepthwiseConvolutionLayer::validate(&aclInputInfo,
&aclWeightsInfo,
@@ -85,9 +85,9 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
unsigned int depthMultiplier;
std::unique_ptr<unsigned char[]> permuteBuffer(new unsigned char[weightInfo.GetNumBytes()]);
std::tie(weightsPermuted, depthMultiplier) = Convert1HWOTensorToAcl(m_Data.m_Weight,
- info.m_InputTensorInfos[0],
- m_Data.m_Parameters.m_DataLayout,
- permuteBuffer.get());
+ info.m_InputTensorInfos[0],
+ m_Data.m_Parameters.m_DataLayout,
+ permuteBuffer.get());
// Convert the weights into the compute library format
m_KernelTensor = std::make_unique<arm_compute::Tensor>();
@@ -100,14 +100,14 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
}
const arm_compute::Size2D aclDilationInfo = BuildArmComputeSize2D(
- m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
+ m_Data.m_Parameters.m_DilationX, m_Data.m_Parameters.m_DilationY);
m_Data.ValidateInputsOutputs("NeonDepthwiseConvolutionWorkload", 1, 1);
- IAclTensorHandle* inputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
+ IAclTensorHandle* inputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0]);
IAclTensorHandle* outputTensorHandle = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
- arm_compute::ITensor& input = inputTensorHandle->GetTensor();
+ arm_compute::ITensor& input = inputTensorHandle->GetTensor();
arm_compute::ITensor& output = outputTensorHandle->GetTensor();
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -129,6 +129,23 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
activationInfo,
aclDilationInfo);
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDepthwiseConvolution2dWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
ScopedTensorHandle weightsPermutedHandle(weightsPermuted);
@@ -145,7 +162,7 @@ NeonDepthwiseConvolutionWorkload::NeonDepthwiseConvolutionWorkload(
void NeonDepthwiseConvolutionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDepthwiseConvolutionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDepthwiseConvolutionWorkload_Execute", this->GetGuid());
ARMNN_ASSERT(m_pDepthwiseConvolutionLayer);
m_pDepthwiseConvolutionLayer->run();
diff --git a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
index 07323d19ca..32c1134c35 100644
--- a/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDequantizeWorkload.cpp
@@ -44,7 +44,7 @@ NeonDequantizeWorkload::NeonDequantizeWorkload(const DequantizeQueueDescriptor&
void NeonDequantizeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDequantizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDequantizeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
index 36f1cd98de..a9cb5c40be 100644
--- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.cpp
@@ -14,19 +14,19 @@
namespace armnn
{
-arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& desc)
+arm_compute::DetectionPostProcessLayerInfo MakeInfo(const DetectionPostProcessDescriptor& descriptor)
{
- return arm_compute::DetectionPostProcessLayerInfo(desc.m_MaxDetections,
- desc.m_MaxClassesPerDetection,
- desc.m_NmsScoreThreshold,
- desc.m_NmsIouThreshold,
- desc.m_NumClasses,
- { desc.m_ScaleX,
- desc.m_ScaleY,
- desc.m_ScaleW,
- desc.m_ScaleH },
- desc.m_UseRegularNms,
- desc.m_DetectionsPerClass);
+ return arm_compute::DetectionPostProcessLayerInfo(descriptor.m_MaxDetections,
+ descriptor.m_MaxClassesPerDetection,
+ descriptor.m_NmsScoreThreshold,
+ descriptor.m_NmsIouThreshold,
+ descriptor.m_NumClasses,
+ { descriptor.m_ScaleX,
+ descriptor.m_ScaleY,
+ descriptor.m_ScaleW,
+ descriptor.m_ScaleH },
+ descriptor.m_UseRegularNms,
+ descriptor.m_DetectionsPerClass);
}
arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodings,
@@ -36,9 +36,9 @@ arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodi
const TensorInfo& detectionClasses,
const TensorInfo& detectionScores,
const TensorInfo& numDetections,
- const DetectionPostProcessDescriptor &desc)
+ const DetectionPostProcessDescriptor &descriptor)
{
- arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(desc);
+ arm_compute::DetectionPostProcessLayerInfo info = MakeInfo(descriptor);
const arm_compute::TensorInfo aclBoxEncodings =
armcomputetensorutils::BuildArmComputeTensorInfo(boxEncodings);
@@ -77,6 +77,12 @@ NeonDetectionPostProcessWorkload::NeonDetectionPostProcessWorkload(
const WorkloadInfo& info)
: BaseWorkload<DetectionPostProcessQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonDetectionPostProcessWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Anchors = std::make_unique<arm_compute::Tensor>();
BuildArmComputeTensor(*m_Anchors, descriptor.m_Anchors->GetTensorInfo());
@@ -104,7 +110,7 @@ NeonDetectionPostProcessWorkload::NeonDetectionPostProcessWorkload(
void NeonDetectionPostProcessWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDetectionPostProcessWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDetectionPostProcessWorkload_Execute", this->GetGuid());
m_Func.run();
}
diff --git a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
index 29876ff795..82ef1e2f19 100644
--- a/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
+++ b/src/backends/neon/workloads/NeonDetectionPostProcessWorkload.hpp
@@ -20,7 +20,7 @@ arm_compute::Status NeonDetectionPostProcessValidate(const TensorInfo& boxEncodi
const TensorInfo& detectionClasses,
const TensorInfo& detectionScores,
const TensorInfo& numDetections,
- const DetectionPostProcessDescriptor &desc);
+ const DetectionPostProcessDescriptor &descriptor);
class NeonDetectionPostProcessWorkload : public BaseWorkload<DetectionPostProcessQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonDivisionWorkload.cpp b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
index fa61a100a9..8c5d2b80f6 100644
--- a/src/backends/neon/workloads/NeonDivisionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonDivisionWorkload.cpp
@@ -50,7 +50,7 @@ NeonDivisionWorkload::NeonDivisionWorkload(const DivisionQueueDescriptor& descri
void NeonDivisionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonDivisionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonDivisionWorkload_Execute", this->GetGuid());
m_DivLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonExpWorkload.cpp b/src/backends/neon/workloads/NeonExpWorkload.cpp
index 7baaa84547..aff8e72a4e 100644
--- a/src/backends/neon/workloads/NeonExpWorkload.cpp
+++ b/src/backends/neon/workloads/NeonExpWorkload.cpp
@@ -25,6 +25,12 @@ arm_compute::Status NeonExpWorkloadValidate(const TensorInfo& input, const Tenso
NeonExpWorkload::NeonExpWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonExpWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonExpWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -35,7 +41,7 @@ NeonExpWorkload::NeonExpWorkload(const ElementwiseUnaryQueueDescriptor& descript
void NeonExpWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonExpWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonExpWorkload_Execute", this->GetGuid());
m_ExpLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonFillWorkload.cpp b/src/backends/neon/workloads/NeonFillWorkload.cpp
index 5965d2000c..0a3c7f0c88 100644
--- a/src/backends/neon/workloads/NeonFillWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFillWorkload.cpp
@@ -19,6 +19,12 @@ using namespace armcomputetensorutils;
NeonFillWorkload::NeonFillWorkload(const FillQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<FillQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFillWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonFillWorkload", 1, 1);
arm_compute::ITensor& output = static_cast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
@@ -31,7 +37,7 @@ NeonFillWorkload::NeonFillWorkload(const FillQueueDescriptor& descriptor, const
void NeonFillWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFillWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFillWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
index c49df33a54..d728e00ea6 100644
--- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp
@@ -29,7 +29,7 @@ NeonFloorFloatWorkload::NeonFloorFloatWorkload(const FloorQueueDescriptor& descr
void NeonFloorFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFloorFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFloorFloatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
} //namespace armnn
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 713771be91..94dc07704d 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -19,6 +19,7 @@
namespace armnn
{
using namespace armcomputetensorutils;
+using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>;
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
@@ -32,10 +33,10 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
arm_compute::TensorInfo aclBiases;
- arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
- aclBiases = BuildArmComputeTensorInfo(biases);
+ aclBiases = BuildArmComputeTensorInfo(biases);
optionalAclBiases = &aclBiases;
}
@@ -50,7 +51,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
}
NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ ACLMemManagerOnDemand& memoryManager)
: BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
@@ -69,8 +71,8 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- arm_compute::FullyConnectedLayerInfo fc_info =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+ arm_compute::FullyConnectedLayerInfo fc_info =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
@@ -98,6 +100,23 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
}
}
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFullyConnectedWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
// Force Compute Library to perform the necessary copying and reshaping, after which
// delete all the input tensors that will no longer be needed
m_FullyConnectedLayer->prepare();
@@ -106,7 +125,7 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
void NeonFullyConnectedWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFullyConnectedWorkload_Execute", this->GetGuid());
m_FullyConnectedLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonGatherWorkload.cpp b/src/backends/neon/workloads/NeonGatherWorkload.cpp
index 2c94cb5184..f5c8d34235 100644
--- a/src/backends/neon/workloads/NeonGatherWorkload.cpp
+++ b/src/backends/neon/workloads/NeonGatherWorkload.cpp
@@ -28,6 +28,12 @@ NeonGatherWorkload::NeonGatherWorkload(const GatherQueueDescriptor& descriptor,
const WorkloadInfo& info)
: BaseWorkload<GatherQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonGatherWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonGatherWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@ NeonGatherWorkload::NeonGatherWorkload(const GatherQueueDescriptor& descriptor,
void NeonGatherWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonGatherWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonGatherWorkload_Execute", this->GetGuid());
m_Layer.run();
}
} //namespace armnn \ No newline at end of file
diff --git a/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp b/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp
index 1bfd1e4d47..a68ea65602 100644
--- a/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonInstanceNormalizationWorkload.cpp
@@ -35,6 +35,12 @@ NeonInstanceNormalizationWorkload::NeonInstanceNormalizationWorkload(
const WorkloadInfo& info)
: BaseWorkload<InstanceNormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonInstanceNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonInstanceNormalizationWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -53,7 +59,7 @@ NeonInstanceNormalizationWorkload::NeonInstanceNormalizationWorkload(
void NeonInstanceNormalizationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonInstanceNormalizationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonInstanceNormalizationWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
index d54607d31e..33b460918c 100644
--- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp
@@ -32,6 +32,12 @@ NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2Norma
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: FloatWorkload<L2NormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonL2NormalizationFloatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonL2NormalizationFloatWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -50,7 +56,7 @@ NeonL2NormalizationFloatWorkload::NeonL2NormalizationFloatWorkload(const L2Norma
void NeonL2NormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonL2NormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonL2NormalizationFloatWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp b/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp
index ba5c9000f4..8a9743298b 100644
--- a/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogSoftmaxWorkload.cpp
@@ -35,6 +35,12 @@ NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor&
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: BaseWorkload<LogSoftmaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogSoftmaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogSoftmaxWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -48,7 +54,7 @@ NeonLogSoftmaxWorkload::NeonLogSoftmaxWorkload(const LogSoftmaxQueueDescriptor&
void NeonLogSoftmaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogSoftmaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogSoftmaxWorkload_Execute", this->GetGuid());
m_LogSoftmaxLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonLogWorkload.cpp b/src/backends/neon/workloads/NeonLogWorkload.cpp
index 460f5b3b09..0fb8f8aa62 100644
--- a/src/backends/neon/workloads/NeonLogWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogWorkload.cpp
@@ -35,7 +35,7 @@ NeonLogWorkload::NeonLogWorkload(const ElementwiseUnaryQueueDescriptor& descript
void NeonLogWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogWorkload_Execute", this->GetGuid());
m_LogLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp b/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp
index d85e05cfe8..179e495292 100644
--- a/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogicalAndWorkload.cpp
@@ -33,6 +33,12 @@ NeonLogicalAndWorkload::NeonLogicalAndWorkload(const LogicalBinaryQueueDescripto
const WorkloadInfo& info)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalAndWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogicalAndWorkload", 2, 1);
arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -44,7 +50,7 @@ NeonLogicalAndWorkload::NeonLogicalAndWorkload(const LogicalBinaryQueueDescripto
void NeonLogicalAndWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalAndWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalAndWorkload_Execute", this->GetGuid());
m_LogicalAndLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp b/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp
index cff5eaf2ba..16bf4e855d 100644
--- a/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogicalNotWorkload.cpp
@@ -31,6 +31,12 @@ NeonLogicalNotWorkload::NeonLogicalNotWorkload(const ElementwiseUnaryQueueDescri
const WorkloadInfo& info)
: BaseWorkload<ElementwiseUnaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalNotWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogicalNotWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@ NeonLogicalNotWorkload::NeonLogicalNotWorkload(const ElementwiseUnaryQueueDescri
void NeonLogicalNotWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalNotWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalNotWorkload_Execute", this->GetGuid());
m_LogicalNotLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp b/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp
index c3f21e149d..301f432673 100644
--- a/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLogicalOrWorkload.cpp
@@ -30,9 +30,15 @@ arm_compute::Status NeonLogicalOrWorkloadValidate(const TensorInfo& input0,
}
NeonLogicalOrWorkload::NeonLogicalOrWorkload(const LogicalBinaryQueueDescriptor& descriptor,
- const WorkloadInfo& info)
+ const WorkloadInfo& info)
: BaseWorkload<LogicalBinaryQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLogicalOrWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonLogicalOrWorkload", 2, 1);
arm_compute::ITensor& input0 = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -44,7 +50,7 @@ NeonLogicalOrWorkload::NeonLogicalOrWorkload(const LogicalBinaryQueueDescriptor&
void NeonLogicalOrWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonLogicalOrWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLogicalOrWorkload_Execute", this->GetGuid());
m_LogicalOrLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp
index 175e908817..f80da03ba1 100644
--- a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp
@@ -19,6 +19,12 @@ using namespace armcomputetensorutils;
NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info)
: FloatWorkload<LstmQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonLstmFloatWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::LSTMParams<arm_compute::ITensor> lstm_param;
// Basic parameters
@@ -267,6 +273,7 @@ NeonLstmFloatWorkload::NeonLstmFloatWorkload(const LstmQueueDescriptor &descript
void NeonLstmFloatWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonLstmFloatWorkload_Execute", this->GetGuid());
m_LstmLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonMaximumWorkload.cpp b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
index c4500d885a..0f95af5f98 100644
--- a/src/backends/neon/workloads/NeonMaximumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMaximumWorkload.cpp
@@ -39,7 +39,7 @@ NeonMaximumWorkload::NeonMaximumWorkload(const MaximumQueueDescriptor& descripto
void NeonMaximumWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMaximumWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMaximumWorkload_Execute", this->GetGuid());
m_MaxLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonMeanWorkload.cpp b/src/backends/neon/workloads/NeonMeanWorkload.cpp
index bb0870d9ef..5d8d1c43a1 100644
--- a/src/backends/neon/workloads/NeonMeanWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMeanWorkload.cpp
@@ -17,21 +17,27 @@ using namespace armcomputetensorutils;
arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const MeanDescriptor& desc)
+ const MeanDescriptor& descriptor)
{
const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
- desc.m_Axis);
+ descriptor.m_Axis);
- return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, desc.m_KeepDims, &aclOutputInfo);
+ return arm_compute::NEReduceMean::validate(&aclInputInfo, coords, descriptor.m_KeepDims, &aclOutputInfo);
}
NeonMeanWorkload::NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<MeanQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonMeanWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonMeanWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -46,7 +52,7 @@ NeonMeanWorkload::NeonMeanWorkload(const MeanQueueDescriptor& descriptor, const
void NeonMeanWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMeanWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMeanWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonMeanWorkload.hpp b/src/backends/neon/workloads/NeonMeanWorkload.hpp
index 055b52a011..5d16588da2 100644
--- a/src/backends/neon/workloads/NeonMeanWorkload.hpp
+++ b/src/backends/neon/workloads/NeonMeanWorkload.hpp
@@ -14,7 +14,7 @@ namespace armnn
arm_compute::Status NeonMeanWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const MeanDescriptor& desc);
+ const MeanDescriptor& descriptor);
class NeonMeanWorkload : public BaseWorkload<MeanQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonMinimumWorkload.cpp b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
index 519b3c4bc6..5212947022 100644
--- a/src/backends/neon/workloads/NeonMinimumWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMinimumWorkload.cpp
@@ -40,7 +40,7 @@ NeonMinimumWorkload::NeonMinimumWorkload(const MinimumQueueDescriptor& descripto
void NeonMinimumWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMinimumWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMinimumWorkload_Execute", this->GetGuid());
m_MinLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
index e4ed195922..0ec550861f 100644
--- a/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMultiplicationWorkload.cpp
@@ -77,7 +77,7 @@ NeonMultiplicationWorkload::NeonMultiplicationWorkload(const MultiplicationQueue
void NeonMultiplicationWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMultiplicationWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonMultiplicationWorkload_Execute", this->GetGuid());
m_PixelWiseMultiplication->run();
}
diff --git a/src/backends/neon/workloads/NeonNegWorkload.cpp b/src/backends/neon/workloads/NeonNegWorkload.cpp
index 06c146754c..e7705e64b4 100644
--- a/src/backends/neon/workloads/NeonNegWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNegWorkload.cpp
@@ -35,7 +35,7 @@ NeonNegWorkload::NeonNegWorkload(const ElementwiseUnaryQueueDescriptor& descript
void NeonNegWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNegWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonNegWorkload_Execute", this->GetGuid());
m_NegLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
index 77fc429b95..92d499737e 100644
--- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
+++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp
@@ -19,6 +19,7 @@ namespace armnn
namespace
{
+using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>;
bool IsNeonNormalizationDescriptorSupported(const NormalizationDescriptor& parameters,
Optional<std::string&> reasonIfUnsupported)
@@ -58,10 +59,16 @@ arm_compute::Status NeonNormalizationWorkloadValidate(const TensorInfo& input,
}
NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor,
- const WorkloadInfo& info,
- std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ ACLMemManagerOnDemand& memoryManager)
: FloatWorkload<NormalizationQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonNormalizationWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonNormalizationFloatWorkload", 1, 1);
std::string reasonIfUnsupported;
if (!IsNeonNormalizationDescriptorSupported(m_Data.m_Parameters, Optional<std::string&>(reasonIfUnsupported)))
@@ -99,7 +106,7 @@ NeonNormalizationFloatWorkload::NeonNormalizationFloatWorkload(const Normalizati
void NeonNormalizationFloatWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonNormalizationFloatWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonNormalizationFloatWorkload_Execute", this->GetGuid());
m_NormalizationLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonPadWorkload.cpp b/src/backends/neon/workloads/NeonPadWorkload.cpp
index 19cdefc8ac..b378d5f843 100644
--- a/src/backends/neon/workloads/NeonPadWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPadWorkload.cpp
@@ -19,6 +19,12 @@ using namespace armcomputetensorutils;
NeonPadWorkload::NeonPadWorkload(const PadQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<PadQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPadWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonPadWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@ NeonPadWorkload::NeonPadWorkload(const PadQueueDescriptor& descriptor, const Wor
void NeonPadWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPadWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPadWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonPermuteWorkload.cpp b/src/backends/neon/workloads/NeonPermuteWorkload.cpp
index a5ecbcb2c0..9e18f7f032 100644
--- a/src/backends/neon/workloads/NeonPermuteWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPermuteWorkload.cpp
@@ -28,6 +28,12 @@ NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descripto
const WorkloadInfo& info)
: BaseWorkload<PermuteQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPermuteWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
using armcomputetensorutils::BuildArmComputePermutationVector;
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
@@ -42,7 +48,7 @@ NeonPermuteWorkload::NeonPermuteWorkload(const PermuteQueueDescriptor& descripto
void NeonPermuteWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(GetName() + "_Execute", this->GetGuid());
m_PermuteFunction.run();
}
diff --git a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
index 968d5ce02d..2115e93872 100644
--- a/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPooling2dWorkload.cpp
@@ -37,6 +37,12 @@ NeonPooling2dWorkload::NeonPooling2dWorkload(
const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<Pooling2dQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonPooling2dWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonPooling2dWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -55,7 +61,7 @@ NeonPooling2dWorkload::NeonPooling2dWorkload(
void NeonPooling2dWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPooling2dWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPooling2dWorkload_Execute", this->GetGuid());
m_PoolingLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonPreluWorkload.cpp b/src/backends/neon/workloads/NeonPreluWorkload.cpp
index 8e6ea301de..af03e7960d 100644
--- a/src/backends/neon/workloads/NeonPreluWorkload.cpp
+++ b/src/backends/neon/workloads/NeonPreluWorkload.cpp
@@ -45,7 +45,7 @@ NeonPreluWorkload::NeonPreluWorkload(const PreluQueueDescriptor& descriptor,
void NeonPreluWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonPreluWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonPreluWorkload_Execute", this->GetGuid());
m_PreluLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonQLstmWorkload.cpp b/src/backends/neon/workloads/NeonQLstmWorkload.cpp
index fd979d6533..c25262afa4 100644
--- a/src/backends/neon/workloads/NeonQLstmWorkload.cpp
+++ b/src/backends/neon/workloads/NeonQLstmWorkload.cpp
@@ -17,6 +17,12 @@ using namespace armcomputetensorutils;
NeonQLstmWorkload::NeonQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<QLstmQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonQLstmWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
arm_compute::LSTMParams<arm_compute::ITensor> qLstmParams;
// Mandatory params
@@ -230,6 +236,7 @@ NeonQLstmWorkload::NeonQLstmWorkload(const QLstmQueueDescriptor& descriptor, con
void NeonQLstmWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizedLstmWorkload_Execute", this->GetGuid());
m_QLstmLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonQuantizeWorkload.cpp b/src/backends/neon/workloads/NeonQuantizeWorkload.cpp
index 14fbdf3dd9..f50ca81cc3 100644
--- a/src/backends/neon/workloads/NeonQuantizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonQuantizeWorkload.cpp
@@ -43,7 +43,7 @@ void NeonQuantizeWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonQuantizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp
index d809017692..e36fde4d89 100644
--- a/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp
+++ b/src/backends/neon/workloads/NeonQuantizedLstmWorkload.cpp
@@ -124,6 +124,7 @@ NeonQuantizedLstmWorkload::NeonQuantizedLstmWorkload(const QuantizedLstmQueueDes
void NeonQuantizedLstmWorkload::Execute() const
{
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonQuantizedLstmWorkload_Execute", this->GetGuid());
m_QuantizedLstmLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp
index 1436cd1192..bf7ce9892e 100644
--- a/src/backends/neon/workloads/NeonReduceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp
@@ -18,28 +18,28 @@ using namespace armcomputetensorutils;
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const ReduceDescriptor& desc)
+ const ReduceDescriptor& descriptor)
{
- if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty())
+ if ( descriptor.m_vAxis.size()==1 || descriptor.m_vAxis.empty())
{
const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
- desc.m_vAxis);
+ descriptor.m_vAxis);
return arm_compute::NEReductionOperation::validate(&aclInputInfo,
&aclOutputInfo,
static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(desc),
- desc.m_KeepDims);
+ ConvertReductionOperationToAcl(descriptor),
+ descriptor.m_KeepDims);
}
else
{
// Validate layer if there are multiple axes.
arm_compute::Status status;
- IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, desc, status);
+ IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, descriptor, status);
return status;
}
}
@@ -47,6 +47,12 @@ arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<ReduceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonReduceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonReduceWorkload", 1, 1);
arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -65,7 +71,7 @@ NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor,
void NeonReduceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReduceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonReduceWorkload_Execute", this->GetGuid());
m_Layer.run();
}
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.hpp b/src/backends/neon/workloads/NeonReduceWorkload.hpp
index 0472091fbf..ddeac1267c 100644
--- a/src/backends/neon/workloads/NeonReduceWorkload.hpp
+++ b/src/backends/neon/workloads/NeonReduceWorkload.hpp
@@ -14,7 +14,7 @@ namespace armnn
arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
- const ReduceDescriptor& desc);
+ const ReduceDescriptor& descriptor);
class NeonReduceWorkload : public BaseWorkload<ReduceQueueDescriptor>
{
diff --git a/src/backends/neon/workloads/NeonReshapeWorkload.cpp b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
index 8b11da7253..7f2f225c23 100644
--- a/src/backends/neon/workloads/NeonReshapeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReshapeWorkload.cpp
@@ -39,7 +39,7 @@ NeonReshapeWorkload::NeonReshapeWorkload(const ReshapeQueueDescriptor& descripto
void NeonReshapeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonReshapeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonReshapeWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonResizeWorkload.cpp b/src/backends/neon/workloads/NeonResizeWorkload.cpp
index ab01e30140..ecb43ae740 100644
--- a/src/backends/neon/workloads/NeonResizeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonResizeWorkload.cpp
@@ -53,6 +53,12 @@ NeonResizeWorkload::NeonResizeWorkload(const ResizeQueueDescriptor& descriptor,
const WorkloadInfo& info)
: BaseWorkload<ResizeQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonResizeWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonResizeWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -83,7 +89,7 @@ NeonResizeWorkload::NeonResizeWorkload(const ResizeQueueDescriptor& descriptor,
void NeonResizeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonResizeWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonResizeWorkload_Execute", this->GetGuid());
m_ResizeLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
index 44980df996..13615f982c 100644
--- a/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
+++ b/src/backends/neon/workloads/NeonRsqrtWorkload.cpp
@@ -36,7 +36,7 @@ NeonRsqrtWorkload::NeonRsqrtWorkload(const RsqrtQueueDescriptor& descriptor, con
void NeonRsqrtWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonRsqrtWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonRsqrtWorkload_Execute", this->GetGuid());
m_RsqrtLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonSinWorkload.cpp b/src/backends/neon/workloads/NeonSinWorkload.cpp
index ac2bd4965a..4602a9f251 100644
--- a/src/backends/neon/workloads/NeonSinWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSinWorkload.cpp
@@ -35,7 +35,7 @@ NeonSinWorkload::NeonSinWorkload(const ElementwiseUnaryQueueDescriptor& descript
void NeonSinWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSinWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSinWorkload_Execute", this->GetGuid());
m_SinLayer.run();
}
diff --git a/src/backends/neon/workloads/NeonSliceWorkload.cpp b/src/backends/neon/workloads/NeonSliceWorkload.cpp
index 32cc042eab..86ae303d56 100644
--- a/src/backends/neon/workloads/NeonSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSliceWorkload.cpp
@@ -37,6 +37,13 @@ NeonSliceWorkload::NeonSliceWorkload(const SliceQueueDescriptor& descriptor,
const WorkloadInfo& info)
: BaseWorkload<SliceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSliceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
+
m_Data.ValidateInputsOutputs("NeonSliceWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -52,7 +59,7 @@ NeonSliceWorkload::NeonSliceWorkload(const SliceQueueDescriptor& descriptor,
void NeonSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSliceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSliceWorkload_Execute", this->GetGuid());
m_SliceFunction.run();
}
diff --git a/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp b/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp
index 505844e24a..da20479d82 100644
--- a/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSoftmaxWorkload.cpp
@@ -34,6 +34,12 @@ NeonSoftmaxWorkload::NeonSoftmaxWorkload(const SoftmaxQueueDescriptor& descripto
const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
: BaseWorkload<SoftmaxQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSoftmaxWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonSoftmaxWorkload", 1, 1);
// The ArmCompute softmax layer uses 2D input/output tensors, so flatten the first three dimensions.
@@ -48,7 +54,7 @@ NeonSoftmaxWorkload::NeonSoftmaxWorkload(const SoftmaxQueueDescriptor& descripto
void NeonSoftmaxWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSoftmaxWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSoftmaxWorkload_Execute", this->GetGuid());
m_SoftmaxLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp
index 42dd49cdc1..d7880e0f8d 100644
--- a/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSpaceToBatchNdWorkload.cpp
@@ -41,10 +41,16 @@ arm_compute::Status NeonSpaceToBatchNdWorkloadValidate(const TensorInfo& input,
&aclOutputInfo);
}
-NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& desc,
+NeonSpaceToBatchNdWorkload::NeonSpaceToBatchNdWorkload(const SpaceToBatchNdQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<SpaceToBatchNdQueueDescriptor>(desc, info)
+ : BaseWorkload<SpaceToBatchNdQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSpaceToBatchNdWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NESpaceToBatchNdWorkload", 1, 1);
arm_compute::ITensor& input =
@@ -79,7 +85,7 @@ void NeonSpaceToBatchNdWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToBatchNdWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToBatchNdWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
index 43c991cfb2..b96b7d05ac 100644
--- a/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSpaceToDepthWorkload.cpp
@@ -29,10 +29,16 @@ arm_compute::Status NeonSpaceToDepthWorkloadValidate(const TensorInfo& input,
return arm_compute::NESpaceToDepthLayer::validate(&aclInput, &aclOutput, blockSize);
}
-NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& desc,
+NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescriptor& descriptor,
const WorkloadInfo& info)
- : BaseWorkload<SpaceToDepthQueueDescriptor>(desc, info)
+ : BaseWorkload<SpaceToDepthQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSpaceToDepthWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonSpaceToDepthWorkload", 1, 1);
arm_compute::DataLayout aclDataLayout = ConvertDataLayout(m_Data.m_Parameters.m_DataLayout);
@@ -40,7 +46,7 @@ NeonSpaceToDepthWorkload::NeonSpaceToDepthWorkload(const SpaceToDepthQueueDescri
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
input.info()->set_data_layout(aclDataLayout);
- int32_t blockSize = armnn::numeric_cast<int32_t>(desc.m_Parameters.m_BlockSize);
+ int32_t blockSize = armnn::numeric_cast<int32_t>(descriptor.m_Parameters.m_BlockSize);
arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
output.info()->set_data_layout(aclDataLayout);
@@ -54,7 +60,7 @@ void NeonSpaceToDepthWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSpaceToDepthWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSpaceToDepthWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonSplitterWorkload.cpp b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
index 4e428a2654..ea1def63d6 100644
--- a/src/backends/neon/workloads/NeonSplitterWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSplitterWorkload.cpp
@@ -56,6 +56,12 @@ arm_compute::Status NeonSplitterWorkloadValidate(const TensorInfo& input,
NeonSplitterWorkload::NeonSplitterWorkload(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<SplitterQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonSplitterWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
bool allOutputsAreSubtensors = true;
// Check that all outputs are sub-tensors
@@ -106,7 +112,7 @@ void NeonSplitterWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSplitterWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSplitterWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonStackWorkload.cpp b/src/backends/neon/workloads/NeonStackWorkload.cpp
index 0b327b8a37..ad9bea1554 100644
--- a/src/backends/neon/workloads/NeonStackWorkload.cpp
+++ b/src/backends/neon/workloads/NeonStackWorkload.cpp
@@ -49,6 +49,12 @@ arm_compute::Status NeonStackWorkloadValidate(const std::vector<const TensorInfo
NeonStackWorkload::NeonStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<StackQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonStackWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
std::vector<arm_compute::ITensor*> aclInputs;
for (auto input : m_Data.m_Inputs)
{
@@ -67,7 +73,7 @@ void NeonStackWorkload::Execute() const
{
if (m_Layer)
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStackWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonStackWorkload_Execute", this->GetGuid());
m_Layer->run();
}
}
diff --git a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
index d0aee07f9b..d9ec727e4b 100644
--- a/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonStridedSliceWorkload.cpp
@@ -50,6 +50,12 @@ NeonStridedSliceWorkload::NeonStridedSliceWorkload(const StridedSliceQueueDescri
const WorkloadInfo& info)
: BaseWorkload<StridedSliceQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonStridedSliceWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs("NeonStridedSliceWorkload", 1, 1);
arm_compute::ITensor& input = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -87,7 +93,7 @@ NeonStridedSliceWorkload::NeonStridedSliceWorkload(const StridedSliceQueueDescri
void NeonStridedSliceWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonStridedSliceWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonStridedSliceWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
index 64f68aa6e2..68bf15435f 100644
--- a/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
+++ b/src/backends/neon/workloads/NeonSubtractionWorkload.cpp
@@ -57,7 +57,7 @@ NeonSubtractionWorkload::NeonSubtractionWorkload(const SubtractionQueueDescripto
void NeonSubtractionWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonSubtractionWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonSubtractionWorkload_Execute", this->GetGuid());
m_SubLayer->run();
}
diff --git a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
index a1e545ce05..f9e1b36306 100644
--- a/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
+++ b/src/backends/neon/workloads/NeonTransposeConvolution2dWorkload.cpp
@@ -77,6 +77,23 @@ NeonTransposeConvolution2dWorkload::NeonTransposeConvolution2dWorkload(
arm_compute::PadStrideInfo padStrideInfo = BuildArmComputePadStrideInfo(m_Data.m_Parameters);
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonTransposeConvolution2dWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
m_Layer = std::make_unique<arm_compute::NEDeconvolutionLayer>(memoryManager);
m_Layer->configure(&input, m_KernelTensor.get(), m_BiasTensor.get(), &output, padStrideInfo);
@@ -95,7 +112,7 @@ NeonTransposeConvolution2dWorkload::NeonTransposeConvolution2dWorkload(
void NeonTransposeConvolution2dWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonTransposeConvolution2dWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonTransposeConvolution2dWorkload_Execute", this->GetGuid());
m_Layer->run();
}
diff --git a/src/backends/neon/workloads/NeonTransposeWorkload.cpp b/src/backends/neon/workloads/NeonTransposeWorkload.cpp
index c11f2df2d2..2e4f358482 100644
--- a/src/backends/neon/workloads/NeonTransposeWorkload.cpp
+++ b/src/backends/neon/workloads/NeonTransposeWorkload.cpp
@@ -28,6 +28,12 @@ NeonTransposeWorkload::NeonTransposeWorkload(const TransposeQueueDescriptor& des
const WorkloadInfo& info)
: BaseWorkload<TransposeQueueDescriptor>(descriptor, info)
{
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonTransposeWorkload_Construct",
+ descriptor.m_Parameters,
+ info,
+ this->GetGuid());
+
m_Data.ValidateInputsOutputs(GetName(), 1, 1);
const arm_compute::ITensor& input = static_cast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
@@ -41,7 +47,7 @@ NeonTransposeWorkload::NeonTransposeWorkload(const TransposeQueueDescriptor& des
void NeonTransposeWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON(GetName() + "_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(GetName() + "_Execute", this->GetGuid());
m_PermuteFunction.run();
}