From ae050524109f1ce827962665436ef7430f2ac479 Mon Sep 17 00:00:00 2001 From: David Monahan Date: Wed, 22 Mar 2023 16:48:58 +0000 Subject: IVGCVSW-7255 Update Doxygen Documentation and publish on GitHub. * Updating Doxygen documentation for 23.02 release. Signed-off-by: David Monahan Change-Id: I545574ff7664b4595d2fe6a91a3c35d2ad55df82 --- ...lassarmnn_1_1_neon_batch_mat_mul_workload.xhtml | 159 ++++++++++++++++----- 1 file changed, 125 insertions(+), 34 deletions(-) (limited to '23.02/classarmnn_1_1_neon_batch_mat_mul_workload.xhtml') diff --git a/23.02/classarmnn_1_1_neon_batch_mat_mul_workload.xhtml b/23.02/classarmnn_1_1_neon_batch_mat_mul_workload.xhtml index 6cfdcf0f66..efcf77e439 100644 --- a/23.02/classarmnn_1_1_neon_batch_mat_mul_workload.xhtml +++ b/23.02/classarmnn_1_1_neon_batch_mat_mul_workload.xhtml @@ -8,7 +8,7 @@ - + ArmNN: NeonBatchMatMulWorkload Class Reference @@ -19,9 +19,6 @@ - @@ -30,7 +27,8 @@ extensions: ["tex2jax.js"], jax: ["input/TeX","output/HTML-CSS"], }); - + + @@ -51,18 +49,21 @@ - + +/* @license-end */
@@ -76,7 +77,9 @@ $(function() {
@@ -111,13 +114,13 @@ Inheritance diagram for NeonBatchMatMulWorkload:
NeonBaseWorkload< BatchMatMulQueueDescriptor > BaseWorkload< BatchMatMulQueueDescriptor > -IWorkload - - +IWorkload + + - + @@ -144,6 +147,10 @@ Public Member Functions + + + + @@ -191,23 +198,81 @@ Additional Inherited Members

Definition at line 102 of file NeonBatchMatMulWorkload.cpp.

- -

References ARMNN_REPORT_PROFILING_WORKLOAD_DESC, BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BaseWorkload< BatchMatMulQueueDescriptor >::m_Data, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, QueueDescriptor::m_Inputs, QueueDescriptor::m_Outputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, armnn::NCHW, and QueueDescriptor::ValidateInputsOutputs().

-
104  : NeonBaseWorkload<BatchMatMulQueueDescriptor>(descriptor, info)
105 {
106  if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
107  {
108  throw Exception("Support for adjoint not implemented.");
109  }
110  if (descriptor.m_Parameters.m_DataLayoutX != armnn::DataLayout::NCHW ||
111  descriptor.m_Parameters.m_DataLayoutY != armnn::DataLayout::NCHW )
112  {
113  throw Exception("Only supported the MatMul in the last 2 dimensions");
114  }
115 
116  // Report Profiling Details
117  ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchMatMulWorkload_Construct",
118  descriptor.m_Parameters,
119  info,
120  this->GetGuid());
121 
122  m_Data.ValidateInputsOutputs("NeonBatchMatMulWorkload", 2, 1);
123 
124  arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
125  arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
126  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
127  arm_compute::ITensor& output = outputHandle->GetTensor();
128 
129  arm_compute::DataLayout aclDataLayoutX = ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX);
130  arm_compute::DataLayout aclDataLayoutY = ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY);
131 
132  inputX.info()->set_data_layout(aclDataLayoutX);
133  inputY.info()->set_data_layout(aclDataLayoutY);
134 
135  if (descriptor.m_Parameters.m_TransposeX == true)
136  {
137  armnn::PermutationVector permutationXVector
138  = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions());
139  const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector);
140  const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector);
141 
142  auto permuteLayerX = std::make_unique<arm_compute::NEPermute>();
143  BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo);
144  InitialiseArmComputeTensorEmpty(m_PermutedTensorX);
145  permuteLayerX->configure(&inputX, &m_PermutedTensorX, aclPermutationXVector);
146  m_PermuteLayerX.reset(permuteLayerX.release());
147  }
148 
149  if (descriptor.m_Parameters.m_TransposeY == true)
150  {
151  armnn::PermutationVector permutationYVector
152  = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[1].GetNumDimensions());
153  const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[1], permutationYVector);
154  const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector);
155 
156  auto permuteLayerY = std::make_unique<arm_compute::NEPermute>();
157  BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo);
158  InitialiseArmComputeTensorEmpty(m_PermutedTensorY);
159  permuteLayerY->configure(&inputY, &m_PermutedTensorY, aclPermutationYVector);
160  m_PermuteLayerY.reset(permuteLayerY.release());
161  }
162 
163  const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped
164  false, // is inputY reshaped
165  false); // is inputY reshaped only 1st run
166  auto gemmLayer = std::make_unique<arm_compute::NEGEMM>();
167  gemmLayer->configure(descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX,
168  descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY,
169  nullptr,
170  &output,
171  1.0,
172  0,
173  gemm_info);
174  m_GEMMLayer.reset(gemmLayer.release());
175 }
-
DataLayout
Definition: Types.hpp:62
-
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
-
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
- -
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout) ...
- - -
std::vector< ITensorHandle * > m_Outputs
- -
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227
-
std::vector< ITensorHandle * > m_Inputs
- -
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98
+
104  : NeonBaseWorkload<BatchMatMulQueueDescriptor>(descriptor, info)
+
105 {
+
106  if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
+
107  {
+
108  throw Exception("Support for adjoint not implemented.");
+
109  }
+
110  if (descriptor.m_Parameters.m_DataLayoutX != armnn::DataLayout::NCHW ||
+
111  descriptor.m_Parameters.m_DataLayoutY != armnn::DataLayout::NCHW )
+
112  {
+
113  throw Exception("Only supported the MatMul in the last 2 dimensions");
+
114  }
+
115 
+
116  // Report Profiling Details
+
117  ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchMatMulWorkload_Construct",
+
118  descriptor.m_Parameters,
+
119  info,
+
120  this->GetGuid());
+
121 
+
122  m_Data.ValidateInputsOutputs("NeonBatchMatMulWorkload", 2, 1);
+
123 
+
124  arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
+
125  arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
+
126  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
+
127  arm_compute::ITensor& output = outputHandle->GetTensor();
+
128 
+
129  arm_compute::DataLayout aclDataLayoutX = ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX);
+
130  arm_compute::DataLayout aclDataLayoutY = ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY);
+
131 
+
132  inputX.info()->set_data_layout(aclDataLayoutX);
+
133  inputY.info()->set_data_layout(aclDataLayoutY);
+
134 
+
135  if (descriptor.m_Parameters.m_TransposeX == true)
+
136  {
+
137  armnn::PermutationVector permutationXVector
+
138  = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions());
+
139  const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector);
+
140  const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector);
+
141 
+
142  auto permuteLayerX = std::make_unique<arm_compute::NEPermute>();
+
143  BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo);
+
144  InitialiseArmComputeTensorEmpty(m_PermutedTensorX);
+
145  permuteLayerX->configure(&inputX, &m_PermutedTensorX, aclPermutationXVector);
+
146  m_PermuteLayerX.reset(permuteLayerX.release());
+
147  }
+
148 
+
149  if (descriptor.m_Parameters.m_TransposeY == true)
+
150  {
+
151  armnn::PermutationVector permutationYVector
+
152  = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[1].GetNumDimensions());
+
153  const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[1], permutationYVector);
+
154  const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector);
+
155 
+
156  auto permuteLayerY = std::make_unique<arm_compute::NEPermute>();
+
157  BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo);
+
158  InitialiseArmComputeTensorEmpty(m_PermutedTensorY);
+
159  permuteLayerY->configure(&inputY, &m_PermutedTensorY, aclPermutationYVector);
+
160  m_PermuteLayerY.reset(permuteLayerY.release());
+
161  }
+
162 
+
163  const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped
+
164  false, // is inputY reshaped
+
165  false); // is inputY reshaped only 1st run
+
166  auto gemmLayer = std::make_unique<arm_compute::NEGEMM>();
+
167  gemmLayer->configure(descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX,
+
168  descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY,
+
169  nullptr,
+
170  &output,
+
171  1.0,
+
172  0,
+
173  gemm_info);
+
174  m_GEMMLayer.reset(gemmLayer.release());
+
175 }
+

References ARMNN_REPORT_PROFILING_WORKLOAD_DESC, armnn::info, BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BaseWorkload< BatchMatMulQueueDescriptor >::m_Data, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, QueueDescriptor::m_Inputs, QueueDescriptor::m_Outputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, armnn::NCHW, and QueueDescriptor::ValidateInputsOutputs().

+

Member Function Documentation

@@ -237,11 +302,21 @@ Additional Inherited Members

Implements IWorkload.

Definition at line 177 of file NeonBatchMatMulWorkload.cpp.

- -

References ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID, and BaseWorkload< BatchMatMulQueueDescriptor >::GetGuid().

-
178 {
179  ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchMatMulWorkload_Execute", this->GetGuid());
180  if (m_PermuteLayerX)
181  {
182  m_PermuteLayerX->run();
183  }
184  if (m_PermuteLayerY)
185  {
186  m_PermuteLayerY->run();
187  }
188  m_GEMMLayer->run();
189 }
arm::pipe::ProfilingGuid GetGuid() const final
Definition: Workload.hpp:61
-
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(name, guid)
+
178 {
+
179  ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchMatMulWorkload_Execute", this->GetGuid());
+
180  if (m_PermuteLayerX)
+
181  {
+
182  m_PermuteLayerX->run();
+
183  }
+
184  if (m_PermuteLayerY)
+
185  {
+
186  m_PermuteLayerY->run();
+
187  }
+
188  m_GEMMLayer->run();
+
189 }
+

References ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID, and BaseWorkload< BatchMatMulQueueDescriptor >::GetGuid().

+

The documentation for this class was generated from the following files: +
arm::pipe::ProfilingGuid GetGuid() const final
Definition: Workload.hpp:61
+
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
+
DataLayout
Definition: Types.hpp:62
+ +
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98
+
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(name, guid)
+ +
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout)
+
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
+ + +
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227
+ +
std::vector< ITensorHandle * > m_Outputs
+
std::vector< ITensorHandle * > m_Inputs
+ -- cgit v1.2.1

Public Member Functions

 NeonBatchMatMulWorkload (const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info)
 NeonBatchMatMulWorkload (const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info)
 
virtual void Execute () const override
 
- Public Member Functions inherited from IWorkload
virtual ~IWorkload ()
 
virtual arm::pipe::ProfilingGuid GetGuid () const =0
 
virtual bool SupportsTensorHandleReplacement () const =0
 
virtual void RegisterDebugCallback (const DebugCallbackFunction &)
 
virtual armnn::Optional< armnn::MemoryRequirementsGetMemoryRequirements ()