104 : NeonBaseWorkload<BatchMatMulQueueDescriptor>(descriptor,
info)
106 if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY )
108 throw Exception(
"Support for adjoint not implemented.");
113 throw Exception(
"Only supported the MatMul in the last 2 dimensions");
118 descriptor.m_Parameters,
124 arm_compute::ITensor& inputX = PolymorphicDowncast<IAclTensorHandle*>(
m_Data.
m_Inputs[0])->GetTensor();
125 arm_compute::ITensor& inputY = PolymorphicDowncast<IAclTensorHandle*>(
m_Data.
m_Inputs[1])->GetTensor();
126 auto outputHandle = PolymorphicDowncast<IAclTensorHandle*>(
m_Data.
m_Outputs[0]);
127 arm_compute::ITensor& output = outputHandle->GetTensor();
132 inputX.info()->set_data_layout(aclDataLayoutX);
133 inputY.info()->set_data_layout(aclDataLayoutY);
135 if (descriptor.m_Parameters.m_TransposeX ==
true)
140 const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector);
142 auto permuteLayerX = std::make_unique<arm_compute::NEPermute>();
143 BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo);
144 InitialiseArmComputeTensorEmpty(m_PermutedTensorX);
145 permuteLayerX->configure(&inputX, &m_PermutedTensorX, aclPermutationXVector);
146 m_PermuteLayerX.reset(permuteLayerX.release());
149 if (descriptor.m_Parameters.m_TransposeY ==
true)
154 const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector);
156 auto permuteLayerY = std::make_unique<arm_compute::NEPermute>();
157 BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo);
158 InitialiseArmComputeTensorEmpty(m_PermutedTensorY);
159 permuteLayerY->configure(&inputY, &m_PermutedTensorY, aclPermutationYVector);
160 m_PermuteLayerY.reset(permuteLayerY.release());
163 const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(
false,
166 auto gemmLayer = std::make_unique<arm_compute::NEGEMM>();
167 gemmLayer->configure(descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX,
168 descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY,
174 m_GEMMLayer.reset(gemmLayer.release());
armnn::PermutationVector GeneratePermutationVectorOnLastTwoDimensions(unsigned int rank)
Generates a permutation vector of size rank that permutes the 2 most right dimensions.
void ValidateInputsOutputs(const std::string &descName, unsigned int numExpectedIn, unsigned int numExpectedOut) const
LayerDescriptor m_Parameters
DataLayout m_DataLayoutX
Data layout of each input tensor, such as NHWC/NDHWC (leave as default for arbitrary layout) ...
BatchMatMulQueueDescriptor m_Data
std::vector< ITensorHandle * > m_Outputs
#define ARMNN_REPORT_PROFILING_WORKLOAD_DESC(name, desc, infos, guid)
std::vector< ITensorHandle * > m_Inputs
armnn::TensorShape Permuted(const armnn::TensorShape &srcShape, const armnn::PermutationVector &mappings)