ArmNN
 22.11
NeonBatchMatMulWorkload Class Reference

#include <NeonBatchMatMulWorkload.hpp>

Inheritance diagram for NeonBatchMatMulWorkload:
NeonBaseWorkload< BatchMatMulQueueDescriptor > BaseWorkload< BatchMatMulQueueDescriptor > IWorkload

Public Member Functions

 NeonBatchMatMulWorkload (const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info)
 
virtual void Execute () const override
 
- Public Member Functions inherited from NeonBaseWorkload< BatchMatMulQueueDescriptor >
 NeonBaseWorkload (const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info)
 
void ReplaceInputTensorHandle (ITensorHandle *tensorHandle, unsigned int slot) override
 
void ReplaceOutputTensorHandle (ITensorHandle *tensorHandle, unsigned int slot) override
 
- Public Member Functions inherited from BaseWorkload< BatchMatMulQueueDescriptor >
 BaseWorkload (const BatchMatMulQueueDescriptor &descriptor, const WorkloadInfo &info)
 
void ExecuteAsync (ExecutionData &executionData) override
 
void PostAllocationConfigure () override
 
const BatchMatMulQueueDescriptorGetData () const
 
arm::pipe::ProfilingGuid GetGuid () const final
 
virtual bool SupportsTensorHandleReplacement () const override
 
- Public Member Functions inherited from IWorkload
virtual ~IWorkload ()
 
virtual void RegisterDebugCallback (const DebugCallbackFunction &)
 
virtual armnn::Optional< armnn::MemoryRequirementsGetMemoryRequirements ()
 

Additional Inherited Members

- Protected Member Functions inherited from NeonBaseWorkload< BatchMatMulQueueDescriptor >
virtual void Reconfigure ()
 
- Protected Attributes inherited from BaseWorkload< BatchMatMulQueueDescriptor >
BatchMatMulQueueDescriptor m_Data
 
const arm::pipe::ProfilingGuid m_Guid
 

Detailed Description

Definition at line 22 of file NeonBatchMatMulWorkload.hpp.

Constructor & Destructor Documentation

◆ NeonBatchMatMulWorkload()

NeonBatchMatMulWorkload ( const BatchMatMulQueueDescriptor descriptor,
const WorkloadInfo info 
)

Definition at line 102 of file NeonBatchMatMulWorkload.cpp.

References ARMNN_REPORT_PROFILING_WORKLOAD_DESC, BatchMatMulDescriptor::m_AdjointX, BatchMatMulDescriptor::m_AdjointY, BaseWorkload< BatchMatMulQueueDescriptor >::m_Data, BatchMatMulDescriptor::m_DataLayoutX, BatchMatMulDescriptor::m_DataLayoutY, QueueDescriptor::m_Inputs, QueueDescriptor::m_Outputs, QueueDescriptorWithParameters< LayerDescriptor >::m_Parameters, armnn::NCHW, and QueueDescriptor::ValidateInputsOutputs().

104  : NeonBaseWorkload<BatchMatMulQueueDescriptor>(descriptor, info)
105 {
106  if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
107  {
108  throw Exception("Support for adjoint not implemented.");
109  }
110  if (descriptor.m_Parameters.m_DataLayoutX != armnn::DataLayout::NCHW ||
111  descriptor.m_Parameters.m_DataLayoutY != armnn::DataLayout::NCHW )
112  {
113  throw Exception("Only supported the MatMul in the last 2 dimensions");
114  }
115 
116  // Report Profiling Details
117  ARMNN_REPORT_PROFILING_WORKLOAD_DESC("NeonBatchMatMulWorkload_Construct",
118  descriptor.m_Parameters,
119  info,
120  this->GetGuid());
121 
122  m_Data.ValidateInputsOutputs("NeonBatchMatMulWorkload", 2, 1);
123 
124  arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[0])->GetTensor();
125  arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Inputs[1])->GetTensor();
126  auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(m_Data.m_Outputs[0]);
127  arm_compute::ITensor& output = outputHandle->GetTensor();
128 
129  arm_compute::DataLayout aclDataLayoutX = ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX);
130  arm_compute::DataLayout aclDataLayoutY = ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY);
131 
132  inputX.info()->set_data_layout(aclDataLayoutX);
133  inputY.info()->set_data_layout(aclDataLayoutY);
134 
135  if (descriptor.m_Parameters.m_TransposeX == true)
136  {
137  armnn::PermutationVector permutationXVector
138  = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions());
139  const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector);
140  const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector);
141 
142  auto permuteLayerX = std::make_unique<arm_compute::NEPermute>();
143  BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo);
144  InitialiseArmComputeTensorEmpty(m_PermutedTensorX);
145  permuteLayerX->configure(&inputX, &m_PermutedTensorX, aclPermutationXVector);
146  m_PermuteLayerX.reset(permuteLayerX.release());
147  }
148 
149  if (descriptor.m_Parameters.m_TransposeY == true)
150  {
151  armnn::PermutationVector permutationYVector
152  = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[1].GetNumDimensions());
153  const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[1], permutationYVector);
154  const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector);
155 
156  auto permuteLayerY = std::make_unique<arm_compute::NEPermute>();
157  BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo);
158  InitialiseArmComputeTensorEmpty(m_PermutedTensorY);
159  permuteLayerY->configure(&inputY, &m_PermutedTensorY, aclPermutationYVector);
160  m_PermuteLayerY.reset(permuteLayerY.release());
161  }
162 
163  const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped
164  false, // is inputY reshaped
165  false); // is inputY reshaped only 1st run
166  auto gemmLayer = std::make_unique<arm_compute::NEGEMM>();
167  gemmLayer->configure(descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX,
168  descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY,
169  nullptr,
170  &output,
171  1.0,
172  0,
173  gemm_info);
174  m_GEMMLayer.reset(gemmLayer.release());
175 }
DataLayout
Definition: Types.hpp:62
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout) ...
std::vector< ITensorHandle * > m_Outputs
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
Definition: Profiling.hpp:227
std::vector< ITensorHandle * > m_Inputs
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)
Definition: Permute.cpp:98

Member Function Documentation

◆ Execute()

void Execute ( ) const
overridevirtual

Implements IWorkload.

Definition at line 177 of file NeonBatchMatMulWorkload.cpp.

References ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID, and BaseWorkload< BatchMatMulQueueDescriptor >::GetGuid().

178 {
179  ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonBatchMatMulWorkload_Execute", this->GetGuid());
180  if (m_PermuteLayerX)
181  {
182  m_PermuteLayerX->run();
183  }
184  if (m_PermuteLayerY)
185  {
186  m_PermuteLayerY->run();
187  }
188  m_GEMMLayer->run();
189 }
arm::pipe::ProfilingGuid GetGuid() const final
Definition: Workload.hpp:61
#define ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID(name, guid)

The documentation for this class was generated from the following files: