24 template <
typename F32Workload,
typename U8Workload,
typename QueueDescriptorType>
25 std::unique_ptr<IWorkload> RefWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
26 const WorkloadInfo& info)
const
28 return MakeWorkloadHelper<NullWorkload, F32Workload, U8Workload, NullWorkload, NullWorkload, NullWorkload>
32 template <DataType ArmnnType>
35 auto checkType = [](
const TensorInfo& tensorInfo) {
return tensorInfo.GetDataType() == ArmnnType;};
36 auto it = std::find_if(std::begin(
info.m_InputTensorInfos), std::end(
info.m_InputTensorInfos), checkType);
37 if (it != std::end(
info.m_InputTensorInfos))
41 it = std::find_if(std::begin(
info.m_OutputTensorInfos), std::end(
info.m_OutputTensorInfos), checkType);
42 if (it != std::end(
info.m_OutputTensorInfos))
50 return IsDataType<DataType::Signed32>(
info);
54 return IsDataType<DataType::BFloat16>(
info);
58 return IsDataType<DataType::Float16>(
info);
62 return IsDataType<DataType::QSymmS16>(
info);
66 return IsDataType<DataType::QSymmS8>(
info);
70 return IsDataType<DataType::QAsymmS8>(
info);
74 return IsDataType<DataType::QAsymmU8>(
info);
78 : m_MemoryManager(memoryManager)
94 std::string& outReasonIfUnsupported)
101 std::string& outReasonIfUnsupported,
108 const bool isMemoryManaged)
const
112 return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
116 return std::make_unique<RefTensorHandle>(tensorInfo);
122 const bool isMemoryManaged)
const
130 return std::make_unique<RefTensorHandle>(tensorInfo, m_MemoryManager);
134 return std::make_unique<RefTensorHandle>(tensorInfo);
146 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
147 return std::make_unique<RefActivationWorkload>(*activationQueueDescriptor,
info);
151 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
154 return std::make_unique<RefAdditionWorkload<int32_t>>(*additionQueueDescriptor,
info);
158 return std::make_unique<RefAdditionWorkload<float>>(*additionQueueDescriptor,
info);
163 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
164 return std::make_unique<RefArgMinMaxWorkload>(*argMinMaxQueueDescriptor,
info);
168 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
169 return std::make_unique<RefBatchMatMulWorkload>(*batchMatMulQueueDescriptor,
info);
173 auto batchNormQueueDescriptor = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
174 return std::make_unique<RefBatchNormalizationWorkload>(*batchNormQueueDescriptor,
info);
178 auto batchToSpaceNdQueueDescriptor
179 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
180 return std::make_unique<RefBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor,
info);
184 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
185 return std::make_unique<RefCastWorkload>(*castQueueDescriptor,
info);
189 auto channelShuffleQueueDescriptor
190 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
191 return std::make_unique<RefChannelShuffleWorkload>(*channelShuffleQueueDescriptor,
info);
195 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
196 return std::make_unique<RefComparisonWorkload>(*comparisonQueueDescriptor,
info);
200 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
201 return std::make_unique<RefConcatWorkload>(*concatQueueDescriptor,
info);
205 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
206 return std::make_unique<RefConstantWorkload>(*constantQueueDescriptor,
info);
210 auto convertFp16ToFp32QueueDescriptor
211 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
212 return std::make_unique<RefConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
info);
216 auto convertFp32ToFp16QueueDescriptor
217 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
218 return std::make_unique<RefConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
info);
222 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
223 return std::make_unique<RefConvolution2dWorkload>(*convolution2dQueueDescriptor,
info);
227 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
228 return std::make_unique<RefConvolution3dWorkload>(*convolution3dQueueDescriptor,
info);
232 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
235 return std::make_unique<RefDebugBFloat16Workload>(*debugQueueDescriptor,
info);
239 return std::make_unique<RefDebugFloat16Workload>(*debugQueueDescriptor,
info);
243 return std::make_unique<RefDebugQSymmS16Workload>(*debugQueueDescriptor,
info);
247 return std::make_unique<RefDebugQSymmS8Workload>(*debugQueueDescriptor,
info);
251 return std::make_unique<RefDebugQAsymmU8Workload>(*debugQueueDescriptor,
info);
255 return std::make_unique<RefDebugQAsymmS8Workload>(*debugQueueDescriptor,
info);
259 return std::make_unique<RefDebugSigned32Workload>(*debugQueueDescriptor,
info);
261 return MakeWorkload<RefDebugFloat32Workload, RefDebugQAsymmU8Workload>(*debugQueueDescriptor,
info);
265 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
266 return std::make_unique<RefDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor,
info);
270 auto depthwiseConvolution2DQueueDescriptor
271 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
272 return std::make_unique<RefDepthwiseConvolution2dWorkload>(*depthwiseConvolution2DQueueDescriptor,
info);
276 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
277 return std::make_unique<RefDequantizeWorkload>(*dequantizeQueueDescriptor,
info);
281 auto detectionPostProcessQueueDescriptor
282 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
283 return std::make_unique<RefDetectionPostProcessWorkload>(*detectionPostProcessQueueDescriptor,
info);
287 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
290 return std::make_unique<RefDivisionWorkload<int32_t>>(*divisionQueueDescriptor,
info);
294 return std::make_unique<RefDivisionWorkload<float>>(*divisionQueueDescriptor,
info);
299 auto elementwiseBinaryQueueDescriptor
300 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
301 return std::make_unique<RefElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor,
info);
305 auto elementwiseUnaryQueueDescriptor
306 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
309 return std::make_unique<RefLogicalUnaryWorkload>(*elementwiseUnaryQueueDescriptor,
info);
311 return std::make_unique<RefElementwiseUnaryWorkload>(*elementwiseUnaryQueueDescriptor,
info);
315 auto fakeQuantizationQueueDescriptor
316 = PolymorphicDowncast<const FakeQuantizationQueueDescriptor*>(&descriptor);
317 return std::make_unique<RefFakeQuantizationFloat32Workload>(*fakeQuantizationQueueDescriptor,
info);
321 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
322 return std::make_unique<RefFillWorkload>(*fillQueueDescriptor,
info);
326 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
333 return std::make_unique<RefFloorWorkload>(*floorQueueDescriptor,
info);
338 auto fullyConnectedQueueDescriptor
339 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
340 return std::make_unique<RefFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
info);
344 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
345 return std::make_unique<RefGatherWorkload>(*gatherQueueDescriptor,
info);
349 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
350 return std::make_unique<RefGatherNdWorkload>(*gatherNdQueueDescriptor,
info);
354 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
355 if (
info.m_InputTensorInfos.empty() )
359 if (
info.m_OutputTensorInfos.empty())
363 if (
info.m_InputTensorInfos[0].GetNumBytes() !=
info.m_OutputTensorInfos[0].GetNumBytes())
366 "data input and output differ in byte count.");
368 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor,
info);
372 auto instanceNormalizationQueueDescriptor
373 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
374 return std::make_unique<RefInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
info);
378 auto l2NormalizationQueueDescriptor
379 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
380 return std::make_unique<RefL2NormalizationWorkload>(*l2NormalizationQueueDescriptor,
info);
384 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
385 return std::make_unique<RefLogicalBinaryWorkload>(*logicalBinaryQueueDescriptor,
info);
389 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
390 return std::make_unique<RefLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
info);
394 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
395 return std::make_unique<RefLstmWorkload>(*lstmQueueDescriptor,
info);
399 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
402 return std::make_unique<RefMaximumWorkload<int32_t>>(*maximumQueueDescriptor,
info);
406 return std::make_unique<RefMaximumWorkload<float>>(*maximumQueueDescriptor,
info);
411 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
412 return std::make_unique<RefMeanWorkload>(*meanQueueDescriptor,
info);
416 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
421 return std::make_unique<CopyMemGenericWorkload>(*memCopyQueueDescriptor,
info);
425 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
430 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor,
info);
434 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
437 return std::make_unique<RefMinimumWorkload<int32_t>>(*minimumQueueDescriptor,
info);
441 return std::make_unique<RefMinimumWorkload<float>>(*minimumQueueDescriptor,
info);
446 auto multiplicationQueueDescriptor
447 = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
450 return std::make_unique<RefMultiplicationWorkload<int32_t>>(*multiplicationQueueDescriptor,
info);
454 return std::make_unique<RefMultiplicationWorkload<float>>(*multiplicationQueueDescriptor,
info);
459 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
460 return std::make_unique<RefNormalizationWorkload>(*normalizationQueueDescriptor,
info);
464 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
465 if (
info.m_InputTensorInfos.empty() )
469 if (
info.m_OutputTensorInfos.empty())
473 if (
info.m_InputTensorInfos[0].GetNumBytes() !=
info.m_OutputTensorInfos[0].GetNumBytes())
476 "differ in byte count.");
478 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor,
info);
482 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
483 return std::make_unique<RefPadWorkload>(*padQueueDescriptor,
info);
487 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
490 return std::make_unique<RefPermuteQSymm16Workload>(*permuteQueueDescriptor,
info);
494 return std::make_unique<RefPermuteBFloat16Workload>(*permuteQueueDescriptor,
info);
498 return std::make_unique<RefPermuteQAsymmS8Workload>(*permuteQueueDescriptor,
info);
505 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
506 return std::make_unique<RefPooling2dWorkload>(*pooling2dQueueDescriptor,
info);
510 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
511 return std::make_unique<RefPooling3dWorkload>(*pooling3dQueueDescriptor,
info);
519 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
520 return std::make_unique<RefPreluWorkload>(*preluQueueDescriptor,
info);
524 auto qlstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
525 return std::make_unique<RefQLstmWorkload>(*qlstmQueueDescriptor,
info);
529 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
530 return std::make_unique<RefQuantizeWorkload>(*quantizeQueueDescriptor,
info);
534 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
535 return std::make_unique<RefRankWorkload>(*rankQueueDescriptor,
info);
539 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
540 return std::make_unique<RefReduceWorkload>(*reduceQueueDescriptor,
info);
544 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
545 return std::make_unique<RefReshapeWorkload>(*reshapeQueueDescriptor,
info);
549 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
550 return std::make_unique<RefResizeWorkload>(*resizeQueueDescriptor,
info);
554 auto reverseV2QueueDescriptor = PolymorphicDowncast<const ReverseV2QueueDescriptor*>(&descriptor);
555 return std::make_unique<RefReverseV2Workload>(*reverseV2QueueDescriptor,
info);
559 auto shapeQueueDescriptor = PolymorphicDowncast<const ShapeQueueDescriptor*>(&descriptor);
560 return std::make_unique<RefShapeWorkload>(*shapeQueueDescriptor,
info);
564 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
565 return std::make_unique<RefSliceWorkload>(*sliceQueueDescriptor,
info);
569 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
570 return std::make_unique<RefSoftmaxWorkload>(*softmaxQueueDescriptor,
info);
574 auto spaceToBatchNdQueueDescriptor
575 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
576 return std::make_unique<RefSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor,
info);
580 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
581 return std::make_unique<RefSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor,
info);
585 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
586 return std::make_unique<RefSplitterWorkload>(*splitterQueueDescriptor,
info);
590 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
591 return std::make_unique<RefStackWorkload>(*stackQueueDescriptor,
info);
595 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
596 return std::make_unique<RefStridedSliceWorkload>(*stridedSliceQueueDescriptor,
info);
600 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
603 return std::make_unique<RefSubtractionWorkload<int32_t>>(*subtractionQueueDescriptor,
info);
607 return std::make_unique<RefSubtractionWorkload<float>>(*subtractionQueueDescriptor,
info);
612 auto tileQueueDescriptor = PolymorphicDowncast<const TileQueueDescriptor*>(&descriptor);
613 return std::make_unique<RefTileWorkload>(*tileQueueDescriptor,
info);
617 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
620 return std::make_unique<RefTransposeQSymm16Workload>(*transposeQueueDescriptor,
info);
624 return std::make_unique<RefTransposeBFloat16Workload>(*transposeQueueDescriptor,
info);
628 return std::make_unique<RefTransposeQAsymmS8Workload>(*transposeQueueDescriptor,
info);
632 (*transposeQueueDescriptor,
info);
636 auto transposeConvolution2dQueueDescriptor
637 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
638 return std::make_unique<RefTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
info);
642 auto unidirectionalSequenceLstmQueueDescriptor
643 = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
644 return std::make_unique<RefUnidirectionalSequenceLstmWorkload>(*unidirectionalSequenceLstmQueueDescriptor,