152 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
153 return std::make_unique<RefActivationWorkload>(*activationQueueDescriptor, info);
157 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
161 return std::make_unique<RefAdditionWorkload<int32_t>>(*additionQueueDescriptor,
info);
165 return std::make_unique<RefAdditionWorkload<float>>(*additionQueueDescriptor,
info);
170 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
171 return std::make_unique<RefArgMinMaxWorkload>(*argMinMaxQueueDescriptor, info);
175 auto batchMatMulQueueDescriptor = PolymorphicDowncast<const BatchMatMulQueueDescriptor*>(&descriptor);
176 return std::make_unique<RefBatchMatMulWorkload>(*batchMatMulQueueDescriptor, info);
180 auto batchNormQueueDescriptor = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
181 return std::make_unique<RefBatchNormalizationWorkload>(*batchNormQueueDescriptor, info);
185 auto batchToSpaceNdQueueDescriptor
186 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
187 return std::make_unique<RefBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor, info);
191 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
192 return std::make_unique<RefCastWorkload>(*castQueueDescriptor, info);
196 auto channelShuffleQueueDescriptor
197 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
198 return std::make_unique<RefChannelShuffleWorkload>(*channelShuffleQueueDescriptor, info);
202 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
203 return std::make_unique<RefComparisonWorkload>(*comparisonQueueDescriptor, info);
207 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
208 return std::make_unique<RefConcatWorkload>(*concatQueueDescriptor, info);
212 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
213 return std::make_unique<RefConstantWorkload>(*constantQueueDescriptor, info);
217 auto convertFp16ToFp32QueueDescriptor
218 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
219 return std::make_unique<RefConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor, info);
223 auto convertFp32ToFp16QueueDescriptor
224 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
225 return std::make_unique<RefConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor, info);
229 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
230 return std::make_unique<RefConvolution2dWorkload>(*convolution2dQueueDescriptor, info);
234 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
235 return std::make_unique<RefConvolution3dWorkload>(*convolution3dQueueDescriptor, info);
239 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
242 return std::make_unique<RefDebugBFloat16Workload>(*debugQueueDescriptor, info);
246 return std::make_unique<RefDebugFloat16Workload>(*debugQueueDescriptor, info);
250 return std::make_unique<RefDebugQSymmS16Workload>(*debugQueueDescriptor, info);
254 return std::make_unique<RefDebugQSymmS8Workload>(*debugQueueDescriptor, info);
258 return std::make_unique<RefDebugQAsymmU8Workload>(*debugQueueDescriptor, info);
262 return std::make_unique<RefDebugQAsymmS8Workload>(*debugQueueDescriptor, info);
266 return std::make_unique<RefDebugSigned32Workload>(*debugQueueDescriptor, info);
269 return MakeWorkload<RefDebugFloat32Workload, RefDebugQAsymmU8Workload>(*debugQueueDescriptor, info);
273 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
274 return std::make_unique<RefDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor, info);
278 auto depthwiseConvolution2DQueueDescriptor
279 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
280 return std::make_unique<RefDepthwiseConvolution2dWorkload>(*depthwiseConvolution2DQueueDescriptor, info);
284 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
285 return std::make_unique<RefDequantizeWorkload>(*dequantizeQueueDescriptor, info);
289 auto detectionPostProcessQueueDescriptor
290 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
291 return std::make_unique<RefDetectionPostProcessWorkload>(*detectionPostProcessQueueDescriptor, info);
295 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
298 return std::make_unique<RefDivisionWorkload<int32_t>>(*divisionQueueDescriptor,
info);
302 return std::make_unique<RefDivisionWorkload<float>>(*divisionQueueDescriptor,
info);
307 auto elementwiseBinaryQueueDescriptor
308 = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
309 return std::make_unique<RefElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor, info);
313 auto elementwiseUnaryQueueDescriptor
314 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
317 return std::make_unique<RefLogicalUnaryWorkload>(*elementwiseUnaryQueueDescriptor, info);
319 return std::make_unique<RefElementwiseUnaryWorkload>(*elementwiseUnaryQueueDescriptor, info);
323 auto fakeQuantizationQueueDescriptor
324 = PolymorphicDowncast<const FakeQuantizationQueueDescriptor*>(&descriptor);
325 return std::make_unique<RefFakeQuantizationFloat32Workload>(*fakeQuantizationQueueDescriptor, info);
329 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
330 return std::make_unique<RefFillWorkload>(*fillQueueDescriptor, info);
334 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
341 return std::make_unique<RefFloorWorkload>(*floorQueueDescriptor, info);
346 auto fullyConnectedQueueDescriptor
347 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
348 return std::make_unique<RefFullyConnectedWorkload>(*fullyConnectedQueueDescriptor, info);
352 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
353 return std::make_unique<RefGatherWorkload>(*gatherQueueDescriptor, info);
357 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
358 return std::make_unique<RefGatherNdWorkload>(*gatherNdQueueDescriptor, info);
362 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
363 if (
info.m_InputTensorInfos.empty() )
365 throw InvalidArgumentException(
"RefWorkloadFactory::CreateInput: Input cannot be zero length");
367 if (
info.m_OutputTensorInfos.empty())
369 throw InvalidArgumentException(
"RefWorkloadFactory::CreateInput: Output cannot be zero length");
372 if (
info.m_InputTensorInfos[0].GetNumBytes() !=
info.m_OutputTensorInfos[0].GetNumBytes())
374 throw InvalidArgumentException(
"RefWorkloadFactory::CreateInput: "
375 "data input and output differ in byte count.");
378 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor, info);
382 auto instanceNormalizationQueueDescriptor
383 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
384 return std::make_unique<RefInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor, info);
388 auto l2NormalizationQueueDescriptor
389 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
390 return std::make_unique<RefL2NormalizationWorkload>(*l2NormalizationQueueDescriptor, info);
394 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
395 return std::make_unique<RefLogicalBinaryWorkload>(*logicalBinaryQueueDescriptor, info);
399 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
400 return std::make_unique<RefLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor, info);
404 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
405 return std::make_unique<RefLstmWorkload>(*lstmQueueDescriptor, info);
409 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
412 return std::make_unique<RefMaximumWorkload<int32_t>>(*maximumQueueDescriptor,
info);
416 return std::make_unique<RefMaximumWorkload<float>>(*maximumQueueDescriptor,
info);
421 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
422 return std::make_unique<RefMeanWorkload>(*meanQueueDescriptor, info);
426 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
427 if (descriptor.m_Inputs.empty())
429 throw InvalidArgumentException(
"RefWorkloadFactory: CreateMemCopy() expected an input tensor.");
431 return std::make_unique<CopyMemGenericWorkload>(*memCopyQueueDescriptor, info);
435 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
436 if (descriptor.m_Inputs.empty())
438 throw InvalidArgumentException(
"RefWorkloadFactory: CreateMemImport() expected an input tensor.");
440 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor, info);
444 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
447 return std::make_unique<RefMinimumWorkload<int32_t>>(*minimumQueueDescriptor,
info);
451 return std::make_unique<RefMinimumWorkload<float>>(*minimumQueueDescriptor,
info);
456 auto multiplicationQueueDescriptor
457 = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
460 return std::make_unique<RefMultiplicationWorkload<int32_t>>(*multiplicationQueueDescriptor,
info);
464 return std::make_unique<RefMultiplicationWorkload<float>>(*multiplicationQueueDescriptor,
info);
469 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
470 return std::make_unique<RefNormalizationWorkload>(*normalizationQueueDescriptor, info);
474 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
475 if (
info.m_InputTensorInfos.empty() )
477 throw InvalidArgumentException(
"RefWorkloadFactory::CreateOutput: Input cannot be zero length");
479 if (
info.m_OutputTensorInfos.empty())
481 throw InvalidArgumentException(
"RefWorkloadFactory::CreateOutput: Output cannot be zero length");
483 if (
info.m_InputTensorInfos[0].GetNumBytes() !=
info.m_OutputTensorInfos[0].GetNumBytes())
485 throw InvalidArgumentException(
"RefWorkloadFactory::CreateOutput: data input and output "
486 "differ in byte count.");
489 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor, info);
493 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
494 return std::make_unique<RefPadWorkload>(*padQueueDescriptor, info);
498 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
501 return std::make_unique<RefPermuteQSymm16Workload>(*permuteQueueDescriptor, info);
505 return std::make_unique<RefPermuteBFloat16Workload>(*permuteQueueDescriptor, info);
509 return std::make_unique<RefPermuteQAsymmS8Workload>(*permuteQueueDescriptor, info);
512 NullWorkload, NullWorkload, NullWorkload>(*permuteQueueDescriptor,
info);
516 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
517 return std::make_unique<RefPooling2dWorkload>(*pooling2dQueueDescriptor, info);
521 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
522 return std::make_unique<RefPooling3dWorkload>(*pooling3dQueueDescriptor, info);
530 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
531 return std::make_unique<RefPreluWorkload>(*preluQueueDescriptor, info);
535 auto qlstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
536 return std::make_unique<RefQLstmWorkload>(*qlstmQueueDescriptor, info);
540 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
541 return std::make_unique<RefQuantizeWorkload>(*quantizeQueueDescriptor, info);
545 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
546 return std::make_unique<RefRankWorkload>(*rankQueueDescriptor, info);
550 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
551 return std::make_unique<RefReduceWorkload>(*reduceQueueDescriptor, info);
555 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
556 return std::make_unique<RefReshapeWorkload>(*reshapeQueueDescriptor, info);
560 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
561 return std::make_unique<RefResizeWorkload>(*resizeQueueDescriptor, info);
565 auto shapeQueueDescriptor = PolymorphicDowncast<const ShapeQueueDescriptor*>(&descriptor);
566 return std::make_unique<RefShapeWorkload>(*shapeQueueDescriptor, info);
570 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
571 return std::make_unique<RefSliceWorkload>(*sliceQueueDescriptor, info);
575 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
576 return std::make_unique<RefSoftmaxWorkload>(*softmaxQueueDescriptor, info);
580 auto spaceToBatchNdQueueDescriptor
581 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
582 return std::make_unique<RefSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor, info);
586 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
587 return std::make_unique<RefSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor, info);
591 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
592 return std::make_unique<RefSplitterWorkload>(*splitterQueueDescriptor, info);
596 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
597 return std::make_unique<RefStackWorkload>(*stackQueueDescriptor, info);
601 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
602 return std::make_unique<RefStridedSliceWorkload>(*stridedSliceQueueDescriptor, info);
606 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
609 return std::make_unique<RefSubtractionWorkload<int32_t>>(*subtractionQueueDescriptor,
info);
613 return std::make_unique<RefSubtractionWorkload<float>>(*subtractionQueueDescriptor,
info);
618 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
621 return std::make_unique<RefTransposeQSymm16Workload>(*transposeQueueDescriptor, info);
625 return std::make_unique<RefTransposeBFloat16Workload>(*transposeQueueDescriptor, info);
629 return std::make_unique<RefTransposeQAsymmS8Workload>(*transposeQueueDescriptor, info);
633 (*transposeQueueDescriptor,
info);
637 auto transposeConvolution2dQueueDescriptor
638 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
639 return std::make_unique<RefTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor, info);
643 auto unidirectionalSequenceLstmQueueDescriptor
644 = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
645 return std::make_unique<RefUnidirectionalSequenceLstmWorkload>(*unidirectionalSequenceLstmQueueDescriptor,