aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
diff options
context:
space:
mode:
authorKeith Davis <keith.davis@arm.com>2021-08-05 11:35:00 +0100
committerKeithARM <keith.davis@arm.com>2021-08-10 11:53:19 +0000
commit2d0679f33f75c43e7169fe0f0ee2d15d0620e091 (patch)
tree9f259a41e3208aa37a19330b63e0aabac607cacf /src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
parentd218d9804723e78da9bbd36e6211b3310426852b (diff)
downloadarmnn-2d0679f33f75c43e7169fe0f0ee2d15d0620e091.tar.gz
IVGCVSW-6249 Add ProfilingDetails Macros to all workloads in Neon
Signed-off-by: Keith Davis <keith.davis@arm.com> Change-Id: I7be77712a9f790928219ce91222d46cc766ab9dd
Diffstat (limited to 'src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp')
-rw-r--r--src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp31
1 files changed, 25 insertions, 6 deletions
diff --git a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
index 713771be91..94dc07704d 100644
--- a/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
+++ b/src/backends/neon/workloads/NeonFullyConnectedWorkload.cpp
@@ -19,6 +19,7 @@
namespace armnn
{
using namespace armcomputetensorutils;
+using ACLMemManagerOnDemand = std::shared_ptr<arm_compute::MemoryManagerOnDemand>;
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
@@ -32,10 +33,10 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
arm_compute::TensorInfo aclBiases;
- arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
- aclBiases = BuildArmComputeTensorInfo(biases);
+ aclBiases = BuildArmComputeTensorInfo(biases);
optionalAclBiases = &aclBiases;
}
@@ -50,7 +51,8 @@ arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo& input,
}
NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor,
- const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager)
+ const WorkloadInfo& info,
+ ACLMemManagerOnDemand& memoryManager)
: BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
{
m_Data.ValidateInputsOutputs("NeonFullyConnectedWorkload", 1, 1);
@@ -69,8 +71,8 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
- arm_compute::FullyConnectedLayerInfo fc_info =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+ arm_compute::FullyConnectedLayerInfo fc_info =
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
auto layer = std::make_unique<arm_compute::NEFullyConnectedLayer>(memoryManager);
layer->configure(&input, m_WeightsTensor.get(), m_BiasesTensor.get(), &output, fc_info);
@@ -98,6 +100,23 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
}
}
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonFullyConnectedWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
// Force Compute Library to perform the necessary copying and reshaping, after which
// delete all the input tensors that will no longer be needed
m_FullyConnectedLayer->prepare();
@@ -106,7 +125,7 @@ NeonFullyConnectedWorkload::NeonFullyConnectedWorkload(const FullyConnectedQueue
void NeonFullyConnectedWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonFullyConnectedWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFullyConnectedWorkload_Execute", this->GetGuid());
m_FullyConnectedLayer->run();
}