ArmNN
 23.05
NeonBatchMatMulWorkload.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
7 
8 #include "NeonWorkloadUtils.hpp"
9 
12 
14 
15 namespace armnn
16 {
18  const TensorInfo& inputInfoY,
19  const TensorInfo& outputInfo,
20  const BatchMatMulDescriptor& descriptor,
21  const bool isFastMathEnabled,
22  const ActivationDescriptor* activationDescriptor)
23 {
24  if (descriptor.m_AdjointX || descriptor.m_AdjointY )
25  {
26  throw Exception("Support for adjoint not implemented.");
27  }
29  {
30  throw Exception("Only supported the MatMul in the last 2 dimensions");
31  }
32 
33  arm_compute::TensorInfo aclInputInfoX = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoX);
34  arm_compute::TensorInfo aclInputInfoY = armcomputetensorutils::BuildArmComputeTensorInfo(inputInfoY);
35  arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(outputInfo);
36 
37  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
38  aclInputInfoX.set_are_values_constant(false);
39  aclInputInfoY.set_are_values_constant(false);
40 
41  const arm_compute::ActivationLayerInfo activationInfo = ConvertActivationDescriptorToAclActivationLayerInfo(
42  activationDescriptor);
43 
44  arm_compute::MatMulInfo matMulInfo;
45  matMulInfo.adj_lhs(descriptor.m_TransposeX);
46  matMulInfo.adj_rhs(descriptor.m_TransposeY);
47  matMulInfo.fused_activation(activationInfo);
48 
49  arm_compute::CpuMatMulSettings settings;
50  settings.fast_math(isFastMathEnabled);
51 
52  return arm_compute::NEMatMul::validate(&aclInputInfoX, &aclInputInfoY, &aclOutputInfo, matMulInfo, settings);
53 }
54 
56  const WorkloadInfo& info,
57  const bool isFastMathEnabled)
59 {
60  if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
61  {
62  throw Exception("Support for adjoint not implemented.");
63  }
66  {
67  throw Exception("Only supported the MatMul in the last 2 dimensions");
68  }
69 
70  m_Data.ValidateInputsOutputs("NeonBatchMatMulWorkload", 2, 1);
71 
72  arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
73  arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
74  arm_compute::ITensor& output = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0])->GetTensor();
75 
76  // GeMM dispatches kernel handles dynamic inputs differently to static so this flag needs to be set
77  inputX.info()->set_are_values_constant(false);
78  inputY.info()->set_are_values_constant(false);
79 
80  const arm_compute::ActivationLayerInfo activationInfo = ConvertAdditionalInfoToAclActivationLayerInfo(descriptor);
81 
82  arm_compute::MatMulInfo matMulInfo;
83  matMulInfo.adj_lhs(descriptor.m_Parameters.m_TransposeX);
84  matMulInfo.adj_rhs(descriptor.m_Parameters.m_TransposeY);
85  matMulInfo.fused_activation(activationInfo);
86 
87  arm_compute::CpuMatMulSettings settings;
88  settings.fast_math(isFastMathEnabled);
89 
90  m_MatMulLayer.configure(&inputX, &inputY, &output, matMulInfo, settings);
91 
92  // Report Profiling Details
93  WorkloadInfo detailsInfo;
94  detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos;
95  detailsInfo.m_OutputTensorInfos = info.m_OutputTensorInfos;
96  ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchMatMulWorkload_Construct",
97  descriptor.m_Parameters,
98  detailsInfo,
99  GetGuid());
100 }
101 
103 {
104  ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchMatMulWorkload_Execute", this->GetGuid());
105  m_MatMulLayer.run();
106 }
107 } //namespace armnn
armnn::BatchMatMulDescriptor::m_TransposeX
bool m_TransposeX
Transpose the slices of each input tensor Transpose and Adjoint can not both be set to true for the s...
Definition: Descriptors.hpp:1579
armnn::BaseWorkload< BatchMatMulQueueDescriptor >::GetGuid
arm::pipe::ProfilingGuid GetGuid() const final
Definition: Workload.hpp:61
armnn::QueueDescriptor::ValidateInputsOutputs
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
Definition: WorkloadData.cpp:472
armnn::ConvertActivationDescriptorToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertActivationDescriptorToAclActivationLayerInfo(const ActivationDescriptor &actDesc)
Definition: ArmComputeUtils.hpp:85
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::BatchMatMulQueueDescriptor
Definition: WorkloadData.hpp:748
armnn::NeonBatchMatMulWorkload::NeonBatchMatMulWorkload
NeonBatchMatMulWorkload(const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info, const bool isFastMathEnabled)
Definition: NeonBatchMatMulWorkload.cpp:55
armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
armnn::BatchMatMulDescriptor
A BatchMatMulDescriptor for the BatchMatMul operator.
Definition: Descriptors.hpp:1551
PolymorphicDowncast.hpp
NeonBatchMatMulWorkload.hpp
armnn::BaseWorkload< BatchMatMulQueueDescriptor >::m_Data
BatchMatMulQueueDescriptor m_Data
Definition: Workload.hpp:83
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::NeonBatchMatMulWorkload::Execute
virtual void Execute() const override
Definition: NeonBatchMatMulWorkload.cpp:102
ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(name, guid)
Definition: NeonWorkloadUtils.hpp:24
armnn::NeonBatchMatMulValidate
arm_compute::Status NeonBatchMatMulValidate(const TensorInfo &inputInfoX, const TensorInfo &inputInfoY, const TensorInfo &outputInfo, const BatchMatMulDescriptor &descriptor, const bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Definition: NeonBatchMatMulWorkload.cpp:17
armnn::DataLayout::NCHW
@ NCHW
armnn::WorkloadInfo::m_OutputTensorInfos
std::vector< TensorInfo > m_OutputTensorInfos
Definition: WorkloadInfo.hpp:19
armnn::BatchMatMulDescriptor::m_DataLayoutX
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout)
Definition: Descriptors.hpp:1588
armnn::BatchMatMulDescriptor::m_TransposeY
bool m_TransposeY
Definition: Descriptors.hpp:1580
ArmComputeUtils.hpp
armnn::TensorInfo
Definition: Tensor.hpp:152
NeonWorkloadUtils.hpp
armnn::Status
Status
Definition: Types.hpp:42
armnn::ConvertAdditionalInfoToAclActivationLayerInfo
arm_compute::ActivationLayerInfo ConvertAdditionalInfoToAclActivationLayerInfo(const QueueDescriptor &queueDescriptor)
Definition: ArmComputeUtils.hpp:103
armnn::WorkloadInfo
Contains information about TensorInfos of a layer.
Definition: WorkloadInfo.hpp:16
armnn::QueueDescriptorWithParameters::m_Parameters
LayerDescriptor m_Parameters
Definition: WorkloadData.hpp:66
ARMNN_REPORT_PROFILING_WORKLOAD_DESC
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227
armnn::BatchMatMulDescriptor::m_DataLayoutY
DataLayout m_DataLayoutY
Definition: Descriptors.hpp:1589
armnn::QueueDescriptor::m_Outputs
std::vector< ITensorHandle * > m_Outputs
Definition: WorkloadData.hpp:27
WorkloadUtils.hpp
armnn::NeonBaseWorkload
Definition: NeonBaseWorkload.hpp:13
armnn::BatchMatMulDescriptor::m_AdjointY
bool m_AdjointY
Definition: Descriptors.hpp:1585
armnn::BatchMatMulDescriptor::m_AdjointX
bool m_AdjointX
Adjoint the slices of each input tensor Transpose and Adjoint can not both be set to true for the sam...
Definition: Descriptors.hpp:1584
armnn::WorkloadInfo::m_InputTensorInfos
std::vector< TensorInfo > m_InputTensorInfos
Definition: WorkloadInfo.hpp:18
armnn::QueueDescriptor::m_Inputs
std::vector< ITensorHandle * > m_Inputs
Definition: WorkloadData.hpp:26
armnn::BoostLogSeverityMapping::info
@ info