aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl/workloads/ClFullyConnectedWorkload.cpp')
-rw-r--r--src/backends/cl/workloads/ClFullyConnectedWorkload.cpp30
1 files changed, 23 insertions, 7 deletions
diff --git a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
index d1d911ac13..a0889e1b60 100644
--- a/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
+++ b/src/backends/cl/workloads/ClFullyConnectedWorkload.cpp
@@ -28,10 +28,10 @@ arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo& input,
const arm_compute::TensorInfo aclWeights = BuildArmComputeTensorInfo(weights);
arm_compute::TensorInfo aclBiases;
- arm_compute::TensorInfo *optionalAclBiases = nullptr;
+ arm_compute::TensorInfo* optionalAclBiases = nullptr;
if (descriptor.m_BiasEnabled)
{
- aclBiases = BuildArmComputeTensorInfo(biases);
+ aclBiases = BuildArmComputeTensorInfo(biases);
optionalAclBiases = &aclBiases;
}
@@ -50,9 +50,25 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
const WorkloadInfo& info,
std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager,
const arm_compute::CLCompileContext& clCompileContext)
- : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info)
- , m_FullyConnectedLayer(memoryManager)
+ : BaseWorkload<FullyConnectedQueueDescriptor>(descriptor, info), m_FullyConnectedLayer(memoryManager)
{
+ // Add details for profiling output
+ WorkloadInfo detailsInfo;
+
+ detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
+ detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
+ detailsInfo.m_WeightsTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Weight->GetTensorInfo());
+ if (descriptor.m_Parameters.m_BiasEnabled)
+ {
+ detailsInfo.m_BiasTensorInfo = armnn::Optional<armnn::TensorInfo>(descriptor.m_Bias->GetTensorInfo());
+ }
+
+ // Report Profiling Details
+ ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClFullyConnectedWorkload_Construct",
+ descriptor.m_Parameters,
+ detailsInfo,
+ this->GetGuid());
+
m_WeightsTensor = std::make_unique<arm_compute::CLTensor>();
BuildArmComputeTensor(*m_WeightsTensor, m_Data.m_Weight->GetTensorInfo());
@@ -64,13 +80,13 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
m_Data.ValidateInputsOutputs("ClFullyConnectedWorkload", 1, 1);
- arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+ arm_compute::ICLTensor& input = static_cast<IClTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
arm_compute::ICLTensor& output = static_cast<IClTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
arm_compute::FullyConnectedLayerInfo fc_info =
- ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
+ ConvertFullyConnectedDescriptorToAclFullyConnectedLayerInfo(descriptor.m_Parameters, activationInfo);
m_FullyConnectedLayer.configure(clCompileContext,
&input,
@@ -94,7 +110,7 @@ ClFullyConnectedWorkload::ClFullyConnectedWorkload(
void ClFullyConnectedWorkload::Execute() const
{
- ARMNN_SCOPED_PROFILING_EVENT_CL("ClFullyConnectedWorkload_Execute");
+ ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClFullyConnectedWorkload_Execute", this->GetGuid());
RunClFunction(m_FullyConnectedLayer, CHECK_LOCATION());
}