29 #include <arm_compute/core/CL/CLKernelLibrary.h> 30 #include <arm_compute/runtime/CL/CLBufferAllocator.h> 31 #include <arm_compute/runtime/CL/CLScheduler.h> 48 std::string& outReasonIfUnsupported)
55 std::string& outReasonIfUnsupported,
71 if (modelOptions->SaveCachedNetwork())
75 auto cachedFd = modelOptions->GetCachedFileDescriptor();
78 std::vector<uint8_t> compiledContextData;
79 std::stringstream stream;
83 std::string
const serializedString{stream.str()};
84 std::copy(serializedString.begin(),
85 serializedString.end(),
86 std::back_inserter(compiledContextData));
87 auto success = write(cachedFd, compiledContextData.data(), compiledContextData.size());
90 ARMNN_LOG(
info) <<
"ClWorkloadFactory:: Could not cache the compiled context!";
96 auto filePath = modelOptions->GetCachedNetworkFilePath();
97 if (filePath !=
"" && fs::exists(filePath) && fs::is_regular_file(filePath))
100 std::ofstream file(filePath, std::ios::out | std::ios::binary);
108 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
114 return MakeWorkloadHelper<FloatWorkload, Uint8Workload>(descriptor,
info, std::forward<Args>(args)...);
122 template <
typename Workload,
typename QueueDescriptorType,
typename... Args>
123 std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(
const QueueDescriptorType& descriptor,
129 return std::make_unique<Workload>(descriptor,
info, std::forward<Args>(args)...);
137 void ClWorkloadFactory::InitializeCLCompileContext()
140 auto context = arm_compute::CLKernelLibrary::get().context();
141 auto device = arm_compute::CLKernelLibrary::get().get_device();
142 m_CLCompileContext = arm_compute::CLCompileContext(context, device);
144 if (m_ModelContextPtr)
149 if (!(modelOptions->SaveCachedNetwork()))
152 auto cachedFd = modelOptions->GetCachedFileDescriptor();
155 struct stat statBuffer;
156 if (fstat(cachedFd, &statBuffer) == 0)
158 long dataSize =
static_cast<long>(statBuffer.st_size);
161 auto offset = lseek(cachedFd, 0, SEEK_CUR);
164 std::vector <uint8_t> compiledContextData(static_cast<unsigned int>(dataSize));
165 auto success = pread(cachedFd, compiledContextData.data(), compiledContextData.size(), 0);
171 compiledContextData);
179 if (filePath !=
"" && fs::exists(filePath) && fs::is_regular_file(filePath))
182 deserializer.
Deserialize(m_CLCompileContext, context, device, filePath);
189 : m_MemoryManager(memoryManager), m_ModelContextPtr(
IBackendInternal::IBackendSpecificModelContextPtr{})
191 InitializeCLCompileContext();
196 : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr)
198 InitializeCLCompileContext();
202 const bool IsMemoryManaged)
const 205 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo);
206 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
213 const bool IsMemoryManaged)
const 216 std::unique_ptr<ClTensorHandle> tensorHandle = std::make_unique<ClTensorHandle>(tensorInfo, dataLayout);
217 tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup());
224 unsigned int const* subTensorOrigin)
const 227 arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape);
234 coords.set(i, armnn::numeric_cast<int>(subTensorOrigin[revertedIndex]));
237 const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.
GetShape());
238 if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape))
243 return std::make_unique<ClSubTensorHandle>(
244 PolymorphicDowncast<IClTensorHandle*>(&parent), shape, coords);
255 auto activationQueueDescriptor = PolymorphicDowncast<const ActivationQueueDescriptor*>(&descriptor);
256 return MakeWorkload<ClActivationWorkload>(*activationQueueDescriptor,
info, m_CLCompileContext);
260 auto additionQueueDescriptor = PolymorphicDowncast<const AdditionQueueDescriptor*>(&descriptor);
261 return MakeWorkload<ClAdditionWorkload>(*additionQueueDescriptor,
info, m_CLCompileContext);
265 auto argMinMaxQueueDescriptor = PolymorphicDowncast<const ArgMinMaxQueueDescriptor*>(&descriptor);
266 return std::make_unique<ClArgMinMaxWorkload>(*argMinMaxQueueDescriptor,
info, m_CLCompileContext);
270 auto batchNormalizationQueueDescriptor
271 = PolymorphicDowncast<const BatchNormalizationQueueDescriptor*>(&descriptor);
272 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>
273 (*batchNormalizationQueueDescriptor,
info, m_CLCompileContext);
277 auto batchToSpaceNdQueueDescriptor
278 = PolymorphicDowncast<const BatchToSpaceNdQueueDescriptor*>(&descriptor);
279 return MakeWorkload<ClBatchToSpaceNdWorkload>(*batchToSpaceNdQueueDescriptor,
info, m_CLCompileContext);
283 auto castQueueDescriptor = PolymorphicDowncast<const CastQueueDescriptor*>(&descriptor);
284 return MakeWorkload<ClCastWorkload>(*castQueueDescriptor,
info, m_CLCompileContext);
288 auto channelShuffleQueueDescriptor
289 = PolymorphicDowncast<const ChannelShuffleQueueDescriptor*>(&descriptor);
290 return MakeWorkload<ClChannelShuffleWorkload>(*channelShuffleQueueDescriptor,
info, m_CLCompileContext);
294 auto comparisonQueueDescriptor = PolymorphicDowncast<const ComparisonQueueDescriptor*>(&descriptor);
295 return MakeWorkload<ClComparisonWorkload>(*comparisonQueueDescriptor,
info, m_CLCompileContext);
299 auto concatQueueDescriptor = PolymorphicDowncast<const ConcatQueueDescriptor*>(&descriptor);
300 return MakeWorkload<ClConcatWorkload>(*concatQueueDescriptor,
info, m_CLCompileContext);
304 auto constantQueueDescriptor = PolymorphicDowncast<const ConstantQueueDescriptor*>(&descriptor);
305 return MakeWorkload<ClConstantWorkload>(*constantQueueDescriptor,
info, m_CLCompileContext);
309 auto convertFp16ToFp32QueueDescriptor
310 = PolymorphicDowncast<const ConvertFp16ToFp32QueueDescriptor*>(&descriptor);
311 return MakeWorkload<ClConvertFp16ToFp32Workload>(*convertFp16ToFp32QueueDescriptor,
317 auto convertFp32ToFp16QueueDescriptor
318 = PolymorphicDowncast<const ConvertFp32ToFp16QueueDescriptor*>(&descriptor);
319 return MakeWorkload<ClConvertFp32ToFp16Workload>(*convertFp32ToFp16QueueDescriptor,
325 auto convolution2dQueueDescriptor = PolymorphicDowncast<const Convolution2dQueueDescriptor*>(&descriptor);
327 bool isFastMathEnabled =
false;
328 if (m_ModelContextPtr)
330 if (m_ModelContextPtr.get() !=
nullptr)
339 return MakeWorkload<ClConvolution2dWorkload>(*convolution2dQueueDescriptor,
341 m_MemoryManager->GetIntraLayerManager(),
347 auto convolution3dQueueDescriptor = PolymorphicDowncast<const Convolution3dQueueDescriptor*>(&descriptor);
349 bool isFastMathEnabled =
false;
350 if (m_ModelContextPtr)
352 if (m_ModelContextPtr.get() !=
nullptr)
361 return MakeWorkload<ClConvolution3dWorkload>(*convolution3dQueueDescriptor,
363 m_MemoryManager->GetIntraLayerManager(),
369 auto debugQueueDescriptor = PolymorphicDowncast<const DebugQueueDescriptor*>(&descriptor);
370 return MakeWorkload<NullWorkload, NullWorkload>(*debugQueueDescriptor,
info, m_CLCompileContext);
374 auto depthToSpaceQueueDescriptor = PolymorphicDowncast<const DepthToSpaceQueueDescriptor*>(&descriptor);
375 return MakeWorkload<ClDepthToSpaceWorkload>(*depthToSpaceQueueDescriptor,
info, m_CLCompileContext);
379 auto depthwiseConvolution2dQueueDescriptor
380 = PolymorphicDowncast<const DepthwiseConvolution2dQueueDescriptor*>(&descriptor);
381 return MakeWorkload<ClDepthwiseConvolutionWorkload>(*depthwiseConvolution2dQueueDescriptor,
387 auto dequantizeQueueDescriptor = PolymorphicDowncast<const DequantizeQueueDescriptor*>(&descriptor);
388 return MakeWorkload<ClDequantizeWorkload>(*dequantizeQueueDescriptor,
info, m_CLCompileContext);
392 auto detectionPostProcessQueueDescriptor
393 = PolymorphicDowncast<const DetectionPostProcessQueueDescriptor*>(&descriptor);
394 return MakeWorkload<NullWorkload, NullWorkload>(*detectionPostProcessQueueDescriptor,
400 auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
401 return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor,
info, m_CLCompileContext);
405 auto elementwiseUnaryQueueDescriptor
406 = PolymorphicDowncast<const ElementwiseUnaryQueueDescriptor*>(&descriptor);
408 switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation)
413 absQueueDescriptor.
m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
414 absQueueDescriptor.
m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
416 return std::make_unique<ClAbsWorkload>(absQueueDescriptor,
info, m_CLCompileContext);
419 return std::make_unique<ClExpWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
421 return std::make_unique<ClLogWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
423 return std::make_unique<ClLogicalNotWorkload>(*elementwiseUnaryQueueDescriptor,
427 return std::make_unique<ClNegWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
431 rsqrtQueueDescriptor.
m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs;
432 rsqrtQueueDescriptor.
m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs;
434 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor,
info, m_CLCompileContext);
437 return std::make_unique<ClSinWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
439 return std::make_unique<ClSqrtWorkload>(*elementwiseUnaryQueueDescriptor,
info, m_CLCompileContext);
446 auto fillQueueDescriptor = PolymorphicDowncast<const FillQueueDescriptor*>(&descriptor);
447 return std::make_unique<ClFillWorkload>(*fillQueueDescriptor,
info, m_CLCompileContext);
451 auto floorQueueDescriptor = PolymorphicDowncast<const FloorQueueDescriptor*>(&descriptor);
452 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(*floorQueueDescriptor,
info, m_CLCompileContext);
456 auto fullyConnectedQueueDescriptor
457 = PolymorphicDowncast<const FullyConnectedQueueDescriptor*>(&descriptor);
458 return MakeWorkload<ClFullyConnectedWorkload>(*fullyConnectedQueueDescriptor,
460 m_MemoryManager->GetIntraLayerManager(),
465 auto gatherQueueDescriptor = PolymorphicDowncast<const GatherQueueDescriptor*>(&descriptor);
466 return MakeWorkload<ClGatherWorkload>(*gatherQueueDescriptor,
info, m_CLCompileContext);
470 auto gatherNdQueueDescriptor = PolymorphicDowncast<const GatherNdQueueDescriptor*>(&descriptor);
471 return MakeWorkload<ClGatherNdWorkload>(*gatherNdQueueDescriptor,
info, m_CLCompileContext);
475 auto inputQueueDescriptor = PolymorphicDowncast<const InputQueueDescriptor*>(&descriptor);
476 return std::make_unique<CopyMemGenericWorkload>(*inputQueueDescriptor,
info);
480 auto instanceNormalizationQueueDescriptor
481 = PolymorphicDowncast<const InstanceNormalizationQueueDescriptor*>(&descriptor);
482 return MakeWorkload<ClInstanceNormalizationWorkload>(*instanceNormalizationQueueDescriptor,
488 auto l2NormalizationQueueDescriptor
489 = PolymorphicDowncast<const L2NormalizationQueueDescriptor*>(&descriptor);
490 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(*l2NormalizationQueueDescriptor,
496 auto logicalBinaryQueueDescriptor = PolymorphicDowncast<const LogicalBinaryQueueDescriptor*>(&descriptor);
498 switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation)
501 return std::make_unique<ClLogicalAndWorkload>(*logicalBinaryQueueDescriptor,
505 return std::make_unique<ClLogicalOrWorkload>(*logicalBinaryQueueDescriptor,
514 auto logSoftmaxQueueDescriptor = PolymorphicDowncast<const LogSoftmaxQueueDescriptor*>(&descriptor);
516 return MakeWorkload<ClLogSoftmaxWorkload>(*logSoftmaxQueueDescriptor,
518 m_MemoryManager->GetIntraLayerManager(),
523 auto lstmQueueDescriptor = PolymorphicDowncast<const LstmQueueDescriptor*>(&descriptor);
524 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(*lstmQueueDescriptor,
info, m_CLCompileContext);
528 auto maximumQueueDescriptor = PolymorphicDowncast<const MaximumQueueDescriptor*>(&descriptor);
529 return MakeWorkload<ClMaximumWorkload>(*maximumQueueDescriptor,
info, m_CLCompileContext);
533 auto meanQueueDescriptor = PolymorphicDowncast<const MeanQueueDescriptor*>(&descriptor);
534 return MakeWorkload<ClMeanWorkload>(*meanQueueDescriptor,
info, m_CLCompileContext);
538 auto memCopyQueueDescriptor = PolymorphicDowncast<const MemCopyQueueDescriptor*>(&descriptor);
539 if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0])
543 return MakeWorkload<CopyMemGenericWorkload>(*memCopyQueueDescriptor,
info);
547 auto memImportQueueDescriptor = PolymorphicDowncast<const MemImportQueueDescriptor*>(&descriptor);
548 if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0])
552 return std::make_unique<ImportMemGenericWorkload>(*memImportQueueDescriptor,
info);
556 auto minimumQueueDescriptor = PolymorphicDowncast<const MinimumQueueDescriptor*>(&descriptor);
557 return MakeWorkload<ClMinimumWorkload>(*minimumQueueDescriptor,
info, m_CLCompileContext);
561 auto multiplicationQueueDescriptor = PolymorphicDowncast<const MultiplicationQueueDescriptor*>(&descriptor);
562 return MakeWorkload<ClMultiplicationWorkload>(*multiplicationQueueDescriptor,
info, m_CLCompileContext);
566 auto normalizationQueueDescriptor = PolymorphicDowncast<const NormalizationQueueDescriptor*>(&descriptor);
567 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(*normalizationQueueDescriptor,
573 auto outputQueueDescriptor = PolymorphicDowncast<const OutputQueueDescriptor*>(&descriptor);
574 return std::make_unique<CopyMemGenericWorkload>(*outputQueueDescriptor,
info);
578 auto padQueueDescriptor = PolymorphicDowncast<const PadQueueDescriptor*>(&descriptor);
579 return MakeWorkload<ClPadWorkload>(*padQueueDescriptor,
info, m_CLCompileContext);
583 auto permuteQueueDescriptor = PolymorphicDowncast<const PermuteQueueDescriptor*>(&descriptor);
584 return MakeWorkload<ClPermuteWorkload>(*permuteQueueDescriptor,
info, m_CLCompileContext);
588 auto pooling2dQueueDescriptor = PolymorphicDowncast<const Pooling2dQueueDescriptor*>(&descriptor);
589 return MakeWorkload<ClPooling2dWorkload>(*pooling2dQueueDescriptor,
info, m_CLCompileContext);
593 auto pooling3dQueueDescriptor = PolymorphicDowncast<const Pooling3dQueueDescriptor*>(&descriptor);
594 return MakeWorkload<ClPooling3dWorkload>(*pooling3dQueueDescriptor,
info, m_CLCompileContext);
598 auto preCompiledQueueDescriptor = PolymorphicDowncast<const PreCompiledQueueDescriptor*>(&descriptor);
599 return MakeWorkload<NullWorkload, NullWorkload>(*preCompiledQueueDescriptor,
info, m_CLCompileContext);
603 auto preluQueueDescriptor = PolymorphicDowncast<const PreluQueueDescriptor*>(&descriptor);
604 return MakeWorkload<ClPreluWorkload>(*preluQueueDescriptor,
info, m_CLCompileContext);
608 auto qLstmQueueDescriptor = PolymorphicDowncast<const QLstmQueueDescriptor*>(&descriptor);
609 return std::make_unique<ClQLstmWorkload>(*qLstmQueueDescriptor,
info, m_CLCompileContext);
613 auto quantizeQueueDescriptor = PolymorphicDowncast<const QuantizeQueueDescriptor*>(&descriptor);
614 return MakeWorkload<ClQuantizeWorkload>(*quantizeQueueDescriptor,
info, m_CLCompileContext);
618 auto quantizedLstmQueueDescriptor = PolymorphicDowncast<const QuantizedLstmQueueDescriptor*>(&descriptor);
619 return MakeWorkload<ClQuantizedLstmWorkload>(*quantizedLstmQueueDescriptor,
info, m_CLCompileContext);
623 auto rankQueueDescriptor = PolymorphicDowncast<const RankQueueDescriptor*>(&descriptor);
624 return std::make_unique<ClRankWorkload>(*rankQueueDescriptor,
info);
628 auto reduceQueueDescriptor = PolymorphicDowncast<const ReduceQueueDescriptor*>(&descriptor);
629 return std::make_unique<ClReduceWorkload>(*reduceQueueDescriptor,
info);
633 auto reshapeQueueDescriptor = PolymorphicDowncast<const ReshapeQueueDescriptor*>(&descriptor);
634 return MakeWorkload<ClReshapeWorkload>(*reshapeQueueDescriptor,
info, m_CLCompileContext);
638 auto resizeQueueDescriptor = PolymorphicDowncast<const ResizeQueueDescriptor*>(&descriptor);
639 return MakeWorkload<ClResizeWorkload>(*resizeQueueDescriptor,
info, m_CLCompileContext);
643 auto sliceQueueDescriptor = PolymorphicDowncast<const SliceQueueDescriptor*>(&descriptor);
644 return MakeWorkload<ClSliceWorkload>(*sliceQueueDescriptor,
info, m_CLCompileContext);
648 auto softmaxQueueDescriptor = PolymorphicDowncast<const SoftmaxQueueDescriptor*>(&descriptor);
649 return std::make_unique<ClSoftmaxWorkload>(*softmaxQueueDescriptor,
651 m_MemoryManager->GetIntraLayerManager(),
656 auto spaceToBatchNdQueueDescriptor
657 = PolymorphicDowncast<const SpaceToBatchNdQueueDescriptor*>(&descriptor);
658 return MakeWorkload<ClSpaceToBatchNdWorkload>(*spaceToBatchNdQueueDescriptor,
info, m_CLCompileContext);
662 auto spaceToDepthQueueDescriptor = PolymorphicDowncast<const SpaceToDepthQueueDescriptor*>(&descriptor);
663 return MakeWorkload<ClSpaceToDepthWorkload>(*spaceToDepthQueueDescriptor,
info, m_CLCompileContext);
667 auto splitterQueueDescriptor = PolymorphicDowncast<const SplitterQueueDescriptor*>(&descriptor);
668 return MakeWorkload<ClSplitterWorkload>(*splitterQueueDescriptor,
info, m_CLCompileContext);
672 auto stackQueueDescriptor = PolymorphicDowncast<const StackQueueDescriptor*>(&descriptor);
673 return MakeWorkload<ClStackWorkload>(*stackQueueDescriptor,
info, m_CLCompileContext);
677 auto stridedSliceQueueDescriptor = PolymorphicDowncast<const StridedSliceQueueDescriptor*>(&descriptor);
678 return MakeWorkload<ClStridedSliceWorkload>(*stridedSliceQueueDescriptor,
info, m_CLCompileContext);
682 auto subtractionQueueDescriptor = PolymorphicDowncast<const SubtractionQueueDescriptor*>(&descriptor);
683 return MakeWorkload<ClSubtractionWorkload>(*subtractionQueueDescriptor,
info, m_CLCompileContext);
687 auto transposeQueueDescriptor = PolymorphicDowncast<const TransposeQueueDescriptor*>(&descriptor);
688 return MakeWorkload<ClTransposeWorkload>(*transposeQueueDescriptor,
info, m_CLCompileContext);
692 auto transposeConvolution2dQueueDescriptor
693 = PolymorphicDowncast<const TransposeConvolution2dQueueDescriptor*>(&descriptor);
694 return MakeWorkload<ClTransposeConvolution2dWorkload>(*transposeConvolution2dQueueDescriptor,
696 m_MemoryManager->GetIntraLayerManager(),
701 auto desc = PolymorphicDowncast<const UnidirectionalSequenceLstmQueueDescriptor*>(&descriptor);
702 return MakeWorkloadHelper<ClUnidirectionalSequenceLstmFloatWorkload, NullWorkload>(*desc,
714 return MakeWorkload<ClActivationWorkload>(descriptor,
info, m_CLCompileContext);
720 return MakeWorkload<ClAdditionWorkload>(descriptor,
info, m_CLCompileContext);
726 return std::make_unique<ClArgMinMaxWorkload>(descriptor,
info, m_CLCompileContext);
733 return MakeWorkload<ClBatchNormalizationFloatWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
739 return MakeWorkload<ClBatchToSpaceNdWorkload>(descriptor,
info, m_CLCompileContext);
742 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateCast(
const CastQueueDescriptor& descriptor,
745 return MakeWorkload<ClCastWorkload>(descriptor,
info, m_CLCompileContext);
751 return MakeWorkload<ClChannelShuffleWorkload>(descriptor,
info, m_CLCompileContext);
757 return MakeWorkload<ClComparisonWorkload>(descriptor,
info, m_CLCompileContext);
763 return MakeWorkload<ClConcatWorkload>(descriptor,
info, m_CLCompileContext);
769 return MakeWorkload<ClConstantWorkload>(descriptor,
info, m_CLCompileContext);
776 return MakeWorkload<ClConvertFp16ToFp32Workload>(descriptor,
info, m_CLCompileContext);
783 return MakeWorkload<ClConvertFp32ToFp16Workload>(descriptor,
info, m_CLCompileContext);
789 bool isFastMathEnabled =
false;
790 if (m_ModelContextPtr)
792 if (m_ModelContextPtr.get() !=
nullptr)
801 return MakeWorkload<ClConvolution2dWorkload>(descriptor,
803 m_MemoryManager->GetIntraLayerManager(),
811 bool isFastMathEnabled =
false;
812 if (m_ModelContextPtr)
814 if (m_ModelContextPtr.get() !=
nullptr)
823 return MakeWorkload<ClConvolution3dWorkload>(descriptor,
825 m_MemoryManager->GetIntraLayerManager(),
833 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
839 return MakeWorkload<ClDepthToSpaceWorkload>(descriptor,
info, m_CLCompileContext);
846 return MakeWorkload<ClDepthwiseConvolutionWorkload>(descriptor,
info, m_CLCompileContext);
852 return MakeWorkload<ClDequantizeWorkload>(descriptor,
info, m_CLCompileContext);
859 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
865 return std::make_unique<ClDivisionWorkload>(descriptor,
info, m_CLCompileContext);
879 return std::make_unique<ClAbsWorkload>(absQueueDescriptor,
info, m_CLCompileContext);
882 return std::make_unique<ClExpWorkload>(descriptor,
info, m_CLCompileContext);
884 return std::make_unique<ClLogWorkload>(descriptor,
info, m_CLCompileContext);
886 return std::make_unique<ClLogicalNotWorkload>(descriptor,
info, m_CLCompileContext);
888 return std::make_unique<ClNegWorkload>(descriptor,
info, m_CLCompileContext);
892 rsqrtQueueDescriptor.
m_Inputs = descriptor.m_Inputs;
893 rsqrtQueueDescriptor.
m_Outputs = descriptor.m_Outputs;
895 return std::make_unique<ClRsqrtWorkload>(rsqrtQueueDescriptor,
info, m_CLCompileContext);
898 return std::make_unique<ClSinWorkload>(descriptor,
info, m_CLCompileContext);
904 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateFill(
const FillQueueDescriptor& descriptor,
907 return std::make_unique<ClFillWorkload>(descriptor,
info, m_CLCompileContext);
913 return MakeWorkload<ClFloorFloatWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
919 return MakeWorkload<ClFullyConnectedWorkload>(descriptor,
921 m_MemoryManager->GetIntraLayerManager(),
928 return MakeWorkload<ClGatherWorkload>(descriptor,
info, m_CLCompileContext);
934 return std::make_unique<CopyMemGenericWorkload>(descriptor,
info);
941 return MakeWorkload<ClInstanceNormalizationWorkload>(descriptor,
info, m_CLCompileContext);
947 return MakeWorkload<ClL2NormalizationFloatWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
956 return std::make_unique<ClLogicalAndWorkload>(descriptor,
info, m_CLCompileContext);
958 return std::make_unique<ClLogicalOrWorkload>(descriptor,
info, m_CLCompileContext);
967 return MakeWorkload<ClLogSoftmaxWorkload>(descriptor,
969 m_MemoryManager->GetIntraLayerManager(),
976 return MakeWorkload<ClLstmFloatWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
982 return MakeWorkload<ClMaximumWorkload>(descriptor,
info, m_CLCompileContext);
988 return MakeWorkload<ClMeanWorkload>(descriptor,
info, m_CLCompileContext);
999 return MakeWorkload<CopyMemGenericWorkload>(descriptor,
info);
1010 return std::make_unique<ImportMemGenericWorkload>(descriptor,
info);
1016 return MakeWorkload<ClMinimumWorkload>(descriptor,
info, m_CLCompileContext);
1022 return MakeWorkload<ClMultiplicationWorkload>(descriptor,
info, m_CLCompileContext);
1028 return MakeWorkload<ClNormalizationFloatWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
1034 return std::make_unique<CopyMemGenericWorkload>(descriptor,
info);
1040 return MakeWorkload<ClPadWorkload>(descriptor,
info, m_CLCompileContext);
1046 return MakeWorkload<ClPermuteWorkload>(descriptor,
info, m_CLCompileContext);
1052 return MakeWorkload<ClPooling2dWorkload>(descriptor,
info, m_CLCompileContext);
1058 return MakeWorkload<NullWorkload, NullWorkload>(descriptor,
info, m_CLCompileContext);
1064 return MakeWorkload<ClPreluWorkload>(descriptor,
info, m_CLCompileContext);
1067 std::unique_ptr<IWorkload> ClWorkloadFactory::CreateQLstm(
const QLstmQueueDescriptor& descriptor,
1070 return std::make_unique<ClQLstmWorkload>(descriptor,
info, m_CLCompileContext);
1076 return MakeWorkload<ClQuantizeWorkload>(descriptor,
info, m_CLCompileContext);
1082 return MakeWorkload<ClQuantizedLstmWorkload>(descriptor,
info, m_CLCompileContext);
1088 return std::make_unique<ClRankWorkload>(descriptor,
info);
1094 return std::make_unique<ClReduceWorkload>(descriptor,
info);
1100 return MakeWorkload<ClReshapeWorkload>(descriptor,
info, m_CLCompileContext);
1106 return MakeWorkload<ClResizeWorkload>(descriptor,
info, m_CLCompileContext);
1112 return MakeWorkload<ClSliceWorkload>(descriptor,
info, m_CLCompileContext);
1118 return std::make_unique<ClSoftmaxWorkload>(descriptor,
1120 m_MemoryManager->GetIntraLayerManager(),
1121 m_CLCompileContext);
1127 return MakeWorkload<ClSpaceToBatchNdWorkload>(descriptor,
info, m_CLCompileContext);
1133 return MakeWorkload<ClSpaceToDepthWorkload>(descriptor,
info, m_CLCompileContext);
1139 return MakeWorkload<ClSplitterWorkload>(descriptor,
info, m_CLCompileContext);
1145 return MakeWorkload<ClStackWorkload>(descriptor,
info, m_CLCompileContext);
1151 return MakeWorkload<ClStridedSliceWorkload>(descriptor,
info, m_CLCompileContext);
1157 return MakeWorkload<ClSubtractionWorkload>(descriptor,
info, m_CLCompileContext);
1163 return MakeWorkload<ClTransposeWorkload>(descriptor,
info, m_CLCompileContext);
1170 return MakeWorkload<ClTransposeConvolution2dWorkload>(descriptor,
1172 m_MemoryManager->GetIntraLayerManager(),
1173 m_CLCompileContext);
std::unique_ptr< IWorkload > CreateSubtraction(const SubtractionQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< ITensorHandle > CreateSubTensorHandle(ITensorHandle &parent, TensorShape const &subTensorShape, unsigned int const *subTensorOrigin) const override
std::unique_ptr< IWorkload > CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor &descriptor, const WorkloadInfo &info) const override
ClWorkloadFactory(const std::shared_ptr< ClMemoryManager > &memoryManager)
UnaryOperation m_Operation
Specifies the elementwiseUnary operation to execute.
std::unique_ptr< IWorkload > CreateStridedSlice(const StridedSliceQueueDescriptor &, const WorkloadInfo &) const override
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
std::unique_ptr< IWorkload > CreateInput(const InputQueueDescriptor &, const WorkloadInfo &) const override
void AfterWorkloadsCreated() override
std::unique_ptr< IWorkload > CreateL2Normalization(const L2NormalizationQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateBatchNormalization(const BatchNormalizationQueueDescriptor &, const WorkloadInfo &) const override
constexpr const char * ClBackendId()
std::vector< BackendOptions > ModelOptions
std::unique_ptr< IWorkload > CreateSoftmax(const SoftmaxQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreatePooling2d(const Pooling2dQueueDescriptor &, const WorkloadInfo &) const override
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
std::unique_ptr< IWorkload > CreateMean(const MeanQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreatePad(const PadQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< ITensorHandle > CreateTensorHandle(const TensorInfo &tensorInfo, const bool IsMemoryManaged=true) const override
std::unique_ptr< IWorkload > CreateLstm(const LstmQueueDescriptor &, const WorkloadInfo &) const override
const BackendId & GetBackendId() const override
std::unique_ptr< IWorkload > CreateDepthToSpace(const DepthToSpaceQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateLogSoftmax(const LogSoftmaxQueueDescriptor &, const WorkloadInfo &) const override
#define ARMNN_LOG(severity)
std::unique_ptr< IWorkload > CreateComparison(const ComparisonQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateTranspose(const TransposeQueueDescriptor &, const WorkloadInfo &) const override
bool SaveSerializedToStream(std::ostream &stream)
Serializes the ClContext to the stream.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
TypedWorkload< QueueDescriptor, armnn::DataType::Float16, armnn::DataType::Float32 > FloatWorkload
std::unique_ptr< IWorkload > CreateDequantize(const DequantizeQueueDescriptor &, const WorkloadInfo &) const override
LayerDescriptor m_Parameters
std::unique_ptr< IWorkload > CreateSplitter(const SplitterQueueDescriptor &, const WorkloadInfo &) const override
LogicalBinaryOperation m_Operation
Specifies the logical operation to execute.
std::unique_ptr< IWorkload > CreateConvertFp32ToFp16(const ConvertFp32ToFp16QueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateNormalization(const NormalizationQueueDescriptor &, const WorkloadInfo &) const override
void DeserializeFromBinary(arm_compute::CLCompileContext &clCompileContext, cl::Context &context, cl::Device &device, const std::vector< uint8_t > &binaryContent)
Deserializes the CLCompileContext built-in programs from binary file contents.
std::unique_ptr< IWorkload > CreateDepthwiseConvolution2d(const DepthwiseConvolution2dQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateInstanceNormalization(const InstanceNormalizationQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateTransposeConvolution2d(const TransposeConvolution2dQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateSpaceToDepth(const SpaceToDepthQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateConcat(const ConcatQueueDescriptor &, const WorkloadInfo &) const override
static bool IsLayerSupported(const Layer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateWorkload(LayerType type, const QueueDescriptor &descriptor, const WorkloadInfo &info) const override
std::unique_ptr< IWorkload > CreatePrelu(const PreluQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateArgMinMax(const ArgMinMaxQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreatePermute(const PermuteQueueDescriptor &, const WorkloadInfo &) const override
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::unique_ptr< IWorkload > CreateDebug(const DebugQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateConstant(const ConstantQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateConvolution2d(const Convolution2dQueueDescriptor &, const WorkloadInfo &) const override
void Serialize(const arm_compute::CLCompileContext &clCompileContext)
Serializes the CLCompileContext built-in programs.
std::unique_ptr< IWorkload > CreateFullyConnected(const FullyConnectedQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateMinimum(const MinimumQueueDescriptor &, const WorkloadInfo &) const override
RuntimeException WrapClError(const cl::Error &clError, const CheckLocation &location)
bool IsFastMathEnabled() const
std::unique_ptr< IWorkload > CreateReshape(const ReshapeQueueDescriptor &, const WorkloadInfo &) const override
static bool IsLayerSupported(const BackendId &backendId, const IConnectableLayer &layer, Optional< DataType > dataType, std::string &outReasonIfUnsupported)
std::unique_ptr< IWorkload > CreateFloor(const FloorQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateRank(const RankQueueDescriptor &, const WorkloadInfo &) const override
virtual TensorShape GetShape() const =0
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
std::unique_ptr< IWorkload > CreateDetectionPostProcess(const DetectionPostProcessQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateQuantize(const QuantizeQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateAddition(const AdditionQueueDescriptor &, const WorkloadInfo &) const override
void Deserialize(arm_compute::CLCompileContext &clCompileContext, cl::Context &context, cl::Device &device, const std::string &filePath)
Deserializes the CLCompileContext built-in programs from a binary file.
TypedWorkload< QueueDescriptor, armnn::DataType::QAsymmU8 > Uint8Workload
std::unique_ptr< IWorkload > CreateGather(const GatherQueueDescriptor &, const WorkloadInfo &) const override
std::vector< ITensorHandle * > m_Outputs
std::unique_ptr< IWorkload > CreateDivision(const DivisionQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreatePreCompiled(const PreCompiledQueueDescriptor &, const WorkloadInfo &) const override
unsigned int GetNumDimensions() const
Function that returns the tensor rank.
std::unique_ptr< IWorkload > CreateStack(const StackQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateOutput(const OutputQueueDescriptor &, const WorkloadInfo &) const override
Contains information about TensorInfos of a layer.
std::unique_ptr< IWorkload > CreateResize(const ResizeQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateSlice(const SliceQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateConvertFp16ToFp32(const ConvertFp16ToFp32QueueDescriptor &, const WorkloadInfo &) const override
std::vector< ITensorHandle * > m_Inputs
std::unique_ptr< IWorkload > CreateMultiplication(const MultiplicationQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateMaximum(const MaximumQueueDescriptor &, const WorkloadInfo &) const override
std::unique_ptr< IWorkload > CreateActivation(const ActivationQueueDescriptor &, const WorkloadInfo &) const override
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
std::unique_ptr< IWorkload > CreateMemImport(const MemImportQueueDescriptor &, const WorkloadInfo &) const override
std::string GetCachedNetworkFilePath() const
std::unique_ptr< IWorkload > CreateMemCopy(const MemCopyQueueDescriptor &, const WorkloadInfo &) const override
Depthwise Convolution 2D layer workload data.
std::unique_ptr< IWorkload > CreateQuantizedLstm(const QuantizedLstmQueueDescriptor &, const WorkloadInfo &) const override
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...