// // Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ClWorkloadFactory.hpp" #include "ClBackendId.hpp" #include "ClBackendModelContext.hpp" #include "ClContextDeserializer.hpp" #include "ClContextSerializer.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace armnn { namespace { static const BackendId s_Id{ClBackendId()}; } bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, Optional dataType, std::string& outReasonIfUnsupported) { return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, Optional dataType, std::string& outReasonIfUnsupported, const ModelOptions& modelOptions) { return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); } const BackendId& ClWorkloadFactory::GetBackendId() const { return s_Id; } void ClWorkloadFactory::AfterWorkloadsCreated() { if(m_ModelContextPtr) { auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); if (modelOptions->SaveCachedNetwork()) { ClContextSerializer serializer; serializer.Serialize(m_CLCompileContext); auto cachedFd = modelOptions->GetCachedFileDescriptor(); if (cachedFd != -1) { std::vector compiledContextData; std::stringstream stream; bool serialized = serializer.SaveSerializedToStream(stream); if (serialized) { std::string const serializedString{stream.str()}; std::copy(serializedString.begin(), serializedString.end(), std::back_inserter(compiledContextData)); auto success = write(cachedFd, compiledContextData.data(), compiledContextData.size()); if (success == -1) { ARMNN_LOG(info) << "ClWorkloadFactory:: Could not cache the compiled context!"; } } } // Save map to a filepath provided in ModelOptions auto filePath = modelOptions->GetCachedNetworkFilePath(); if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath)) { // Serialize ClContext to the file specified std::ofstream file(filePath, std::ios::out | std::ios::binary); serializer.SaveSerializedToStream(file); } } } } template std::unique_ptr ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args) { try { return MakeWorkloadHelper(descriptor, info, std::forward(args)...); } catch (const cl::Error& clError) { throw WrapClError(clError, CHECK_LOCATION()); } } template std::unique_ptr ClWorkloadFactory::MakeWorkload(const QueueDescriptorType& descriptor, const WorkloadInfo& info, Args&&... args) { try { return std::make_unique(descriptor, info, std::forward(args)...); } catch (const cl::Error& clError) { throw WrapClError(clError, CHECK_LOCATION()); } } void ClWorkloadFactory::InitializeCLCompileContext() { // Initialize our m_CLCompileContext using default device and context auto context = arm_compute::CLKernelLibrary::get().context(); auto device = arm_compute::CLKernelLibrary::get().get_device(); m_CLCompileContext = arm_compute::CLCompileContext(context, device); if (m_ModelContextPtr) { // Load saved programs if the user has set a filepath auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); auto filePath = modelOptions->GetCachedNetworkFilePath(); if (!(modelOptions->SaveCachedNetwork())) { ClContextDeserializer deserializer; auto cachedFd = modelOptions->GetCachedFileDescriptor(); if (cachedFd != -1) { struct stat statBuffer; if (fstat(cachedFd, &statBuffer) == 0) { long dataSize = static_cast(statBuffer.st_size); if( dataSize > 0) { auto offset = lseek(cachedFd, 0, SEEK_CUR); if (offset == 0) { std::vector compiledContextData(static_cast(dataSize)); auto success = pread(cachedFd, compiledContextData.data(), compiledContextData.size(), 0); if (success != -1) { deserializer.DeserializeFromBinary(m_CLCompileContext, context, device, compiledContextData); } } } } } if (filePath != "" && fs::exists(filePath) && fs::is_regular_file(filePath)) { // Deserialize binary file and load into m_CLCompileContext deserializer.Deserialize(m_CLCompileContext, context, device, filePath); } } } } ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr& memoryManager) : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) { InitializeCLCompileContext(); } ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr& memoryManager, const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { InitializeCLCompileContext(); } std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, const bool IsMemoryManaged) const { IgnoreUnused(IsMemoryManaged); std::unique_ptr tensorHandle = std::make_unique(tensorInfo); tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); return tensorHandle; } std::unique_ptr ClWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout, const bool IsMemoryManaged) const { IgnoreUnused(IsMemoryManaged); std::unique_ptr tensorHandle = std::make_unique(tensorInfo, dataLayout); tensorHandle->SetMemoryGroup(m_MemoryManager->GetInterLayerMemoryGroup()); return tensorHandle; } std::unique_ptr ClWorkloadFactory::CreateSubTensorHandle(ITensorHandle& parent, TensorShape const& subTensorShape, unsigned int const* subTensorOrigin) const { arm_compute::Coordinates coords; arm_compute::TensorShape shape = armcomputetensorutils::BuildArmComputeTensorShape(subTensorShape); coords.set_num_dimensions(subTensorShape.GetNumDimensions()); for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) { // Arm compute indexes tensor coords in reverse order. unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; coords.set(i, armnn::numeric_cast(subTensorOrigin[revertedIndex])); } const arm_compute::TensorShape parentShape = armcomputetensorutils::BuildArmComputeTensorShape(parent.GetShape()); if (!::arm_compute::error_on_invalid_subtensor(__func__, __FILE__, __LINE__, parentShape, coords, shape)) { return nullptr; } return std::make_unique( PolymorphicDowncast(&parent), shape, coords); } std::unique_ptr ClWorkloadFactory::CreateWorkload(LayerType type, const QueueDescriptor& descriptor, const WorkloadInfo& info) const { switch(type) { case LayerType::Activation : { auto activationQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*activationQueueDescriptor, info, m_CLCompileContext); } case LayerType::Addition : { auto additionQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*additionQueueDescriptor, info, m_CLCompileContext); } case LayerType::ArgMinMax : { auto argMinMaxQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*argMinMaxQueueDescriptor, info, m_CLCompileContext); } case LayerType::BatchNormalization : { auto batchNormalizationQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload (*batchNormalizationQueueDescriptor, info, m_CLCompileContext); } case LayerType::BatchToSpaceNd : { auto batchToSpaceNdQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*batchToSpaceNdQueueDescriptor, info, m_CLCompileContext); } case LayerType::Cast : { auto castQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*castQueueDescriptor, info, m_CLCompileContext); } case LayerType::ChannelShuffle : { auto channelShuffleQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*channelShuffleQueueDescriptor, info, m_CLCompileContext); } case LayerType::Comparison : { auto comparisonQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*comparisonQueueDescriptor, info, m_CLCompileContext); } case LayerType::Concat : { auto concatQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*concatQueueDescriptor, info, m_CLCompileContext); } case LayerType::Constant : { auto constantQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*constantQueueDescriptor, info, m_CLCompileContext); } case LayerType::ConvertFp16ToFp32 : { auto convertFp16ToFp32QueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*convertFp16ToFp32QueueDescriptor, info, m_CLCompileContext); } case LayerType::ConvertFp32ToFp16 : { auto convertFp32ToFp16QueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*convertFp32ToFp16QueueDescriptor, info, m_CLCompileContext); } case LayerType::Convolution2d : { auto convolution2dQueueDescriptor = PolymorphicDowncast(&descriptor); bool isFastMathEnabled = false; if (m_ModelContextPtr) { if (m_ModelContextPtr.get() != nullptr) { auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); if (modelOptions) { isFastMathEnabled = modelOptions->IsFastMathEnabled(); } } } return MakeWorkload(*convolution2dQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext, isFastMathEnabled); } case LayerType::Convolution3d : { auto convolution3dQueueDescriptor = PolymorphicDowncast(&descriptor); bool isFastMathEnabled = false; if (m_ModelContextPtr) { if (m_ModelContextPtr.get() != nullptr) { auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); if (modelOptions) { isFastMathEnabled = modelOptions->IsFastMathEnabled(); } } } return MakeWorkload(*convolution3dQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext, isFastMathEnabled); } case LayerType::Debug : { auto debugQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*debugQueueDescriptor, info, m_CLCompileContext); } case LayerType::DepthToSpace : { auto depthToSpaceQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*depthToSpaceQueueDescriptor, info, m_CLCompileContext); } case LayerType::DepthwiseConvolution2d : { auto depthwiseConvolution2dQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*depthwiseConvolution2dQueueDescriptor, info, m_CLCompileContext); } case LayerType::Dequantize : { auto dequantizeQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*dequantizeQueueDescriptor, info, m_CLCompileContext); } case LayerType::DetectionPostProcess : { auto detectionPostProcessQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*detectionPostProcessQueueDescriptor, info, m_CLCompileContext); } case LayerType::Division : { auto divisionQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*divisionQueueDescriptor, info, m_CLCompileContext); } case LayerType::ElementwiseUnary : { auto elementwiseUnaryQueueDescriptor = PolymorphicDowncast(&descriptor); switch(elementwiseUnaryQueueDescriptor->m_Parameters.m_Operation) { case UnaryOperation::Abs: { AbsQueueDescriptor absQueueDescriptor; absQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs; absQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs; return std::make_unique(absQueueDescriptor, info, m_CLCompileContext); } case UnaryOperation::Exp: return std::make_unique(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext); case UnaryOperation::Log: return std::make_unique(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext); case UnaryOperation::LogicalNot: return std::make_unique(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext); case UnaryOperation::Neg: return std::make_unique(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext); case UnaryOperation::Rsqrt: { RsqrtQueueDescriptor rsqrtQueueDescriptor; rsqrtQueueDescriptor.m_Inputs = elementwiseUnaryQueueDescriptor->m_Inputs; rsqrtQueueDescriptor.m_Outputs = elementwiseUnaryQueueDescriptor->m_Outputs; return std::make_unique(rsqrtQueueDescriptor, info, m_CLCompileContext); } case UnaryOperation::Sin: return std::make_unique(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext); case UnaryOperation::Sqrt: return std::make_unique(*elementwiseUnaryQueueDescriptor, info, m_CLCompileContext); default: return nullptr; } } case LayerType::Fill : { auto fillQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*fillQueueDescriptor, info, m_CLCompileContext); } case LayerType::Floor : { auto floorQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*floorQueueDescriptor, info, m_CLCompileContext); } case LayerType::FullyConnected : { auto fullyConnectedQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*fullyConnectedQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } case LayerType::Gather : { auto gatherQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*gatherQueueDescriptor, info, m_CLCompileContext); } case LayerType::GatherNd : { auto gatherNdQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*gatherNdQueueDescriptor, info, m_CLCompileContext); } case LayerType::Input : { auto inputQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*inputQueueDescriptor, info); } case LayerType::InstanceNormalization : { auto instanceNormalizationQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*instanceNormalizationQueueDescriptor, info, m_CLCompileContext); } case LayerType::L2Normalization : { auto l2NormalizationQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*l2NormalizationQueueDescriptor, info, m_CLCompileContext); } case LayerType::LogicalBinary : { auto logicalBinaryQueueDescriptor = PolymorphicDowncast(&descriptor); switch(logicalBinaryQueueDescriptor->m_Parameters.m_Operation) { case LogicalBinaryOperation::LogicalAnd: return std::make_unique(*logicalBinaryQueueDescriptor, info, m_CLCompileContext); case LogicalBinaryOperation::LogicalOr: return std::make_unique(*logicalBinaryQueueDescriptor, info, m_CLCompileContext); default: return nullptr; } } case LayerType::LogSoftmax : { auto logSoftmaxQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*logSoftmaxQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } case LayerType::Lstm : { auto lstmQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*lstmQueueDescriptor, info, m_CLCompileContext); } case LayerType::Maximum : { auto maximumQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*maximumQueueDescriptor, info, m_CLCompileContext); } case LayerType::Mean : { auto meanQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*meanQueueDescriptor, info, m_CLCompileContext); } case LayerType::MemCopy : { auto memCopyQueueDescriptor = PolymorphicDowncast(&descriptor); if (memCopyQueueDescriptor->m_Inputs.empty() || !memCopyQueueDescriptor->m_Inputs[0]) { throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); } return MakeWorkload(*memCopyQueueDescriptor, info); } case LayerType::MemImport : { auto memImportQueueDescriptor = PolymorphicDowncast(&descriptor); if (memImportQueueDescriptor->m_Inputs.empty() || !memImportQueueDescriptor->m_Inputs[0]) { throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload"); } return std::make_unique(*memImportQueueDescriptor, info); } case LayerType::Minimum : { auto minimumQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*minimumQueueDescriptor, info, m_CLCompileContext); } case LayerType::Multiplication : { auto multiplicationQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*multiplicationQueueDescriptor, info, m_CLCompileContext); } case LayerType::Normalization : { auto normalizationQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*normalizationQueueDescriptor, info, m_CLCompileContext); } case LayerType::Output : { auto outputQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*outputQueueDescriptor, info); } case LayerType::Pad : { auto padQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*padQueueDescriptor, info, m_CLCompileContext); } case LayerType::Permute : { auto permuteQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*permuteQueueDescriptor, info, m_CLCompileContext); } case LayerType::Pooling2d : { auto pooling2dQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*pooling2dQueueDescriptor, info, m_CLCompileContext); } case LayerType::Pooling3d : { auto pooling3dQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*pooling3dQueueDescriptor, info, m_CLCompileContext); } case LayerType::PreCompiled : { auto preCompiledQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*preCompiledQueueDescriptor, info, m_CLCompileContext); } case LayerType::Prelu : { auto preluQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*preluQueueDescriptor, info, m_CLCompileContext); } case LayerType::QLstm : { auto qLstmQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*qLstmQueueDescriptor, info, m_CLCompileContext); } case LayerType::Quantize : { auto quantizeQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*quantizeQueueDescriptor, info, m_CLCompileContext); } case LayerType::QuantizedLstm : { auto quantizedLstmQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*quantizedLstmQueueDescriptor, info, m_CLCompileContext); } case LayerType::Rank : { auto rankQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*rankQueueDescriptor, info); } case LayerType::Reduce : { auto reduceQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*reduceQueueDescriptor, info); } case LayerType::Reshape : { auto reshapeQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*reshapeQueueDescriptor, info, m_CLCompileContext); } case LayerType::Resize : { auto resizeQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*resizeQueueDescriptor, info, m_CLCompileContext); } case LayerType::Slice : { auto sliceQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*sliceQueueDescriptor, info, m_CLCompileContext); } case LayerType::Softmax : { auto softmaxQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*softmaxQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } case LayerType::SpaceToBatchNd : { auto spaceToBatchNdQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*spaceToBatchNdQueueDescriptor, info, m_CLCompileContext); } case LayerType::SpaceToDepth : { auto spaceToDepthQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*spaceToDepthQueueDescriptor, info, m_CLCompileContext); } case LayerType::Splitter : { auto splitterQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*splitterQueueDescriptor, info, m_CLCompileContext); } case LayerType::Stack : { auto stackQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*stackQueueDescriptor, info, m_CLCompileContext); } case LayerType::StridedSlice : { auto stridedSliceQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*stridedSliceQueueDescriptor, info, m_CLCompileContext); } case LayerType::Subtraction : { auto subtractionQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*subtractionQueueDescriptor, info, m_CLCompileContext); } case LayerType::Transpose : { auto transposeQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*transposeQueueDescriptor, info, m_CLCompileContext); } case LayerType::TransposeConvolution2d : { auto transposeConvolution2dQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*transposeConvolution2dQueueDescriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } case LayerType::UnidirectionalSequenceLstm : { auto desc = PolymorphicDowncast(&descriptor); return MakeWorkloadHelper(*desc, info, m_CLCompileContext); } default: return nullptr; } } std::unique_ptr ClWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateArgMinMax(const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateBatchNormalization( const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateBatchToSpaceNd(const BatchToSpaceNdQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateCast(const CastQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateChannelShuffle(const ChannelShuffleQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateComparison(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateConcat(const ConcatQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateConvertFp16ToFp32( const ConvertFp16ToFp32QueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( const ConvertFp32ToFp16QueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { bool isFastMathEnabled = false; if (m_ModelContextPtr) { if (m_ModelContextPtr.get() != nullptr) { auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); if (modelOptions) { isFastMathEnabled = modelOptions->IsFastMathEnabled(); } } } return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext, isFastMathEnabled); } std::unique_ptr ClWorkloadFactory::CreateConvolution3d(const Convolution3dQueueDescriptor& descriptor, const WorkloadInfo& info) const { bool isFastMathEnabled = false; if (m_ModelContextPtr) { if (m_ModelContextPtr.get() != nullptr) { auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); if (modelOptions) { isFastMathEnabled = modelOptions->IsFastMathEnabled(); } } } return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext, isFastMathEnabled); } std::unique_ptr ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateDepthToSpace(const DepthToSpaceQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateDepthwiseConvolution2d( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateDequantize(const DequantizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateDetectionPostProcess( const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateDivision(const DivisionQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateElementwiseUnary(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info) const { switch(descriptor.m_Parameters.m_Operation) { case UnaryOperation::Abs: { AbsQueueDescriptor absQueueDescriptor; absQueueDescriptor.m_Inputs = descriptor.m_Inputs; absQueueDescriptor.m_Outputs = descriptor.m_Outputs; return std::make_unique(absQueueDescriptor, info, m_CLCompileContext); } case UnaryOperation::Exp: return std::make_unique(descriptor, info, m_CLCompileContext); case UnaryOperation::Log: return std::make_unique(descriptor, info, m_CLCompileContext); case UnaryOperation::LogicalNot: return std::make_unique(descriptor, info, m_CLCompileContext); case UnaryOperation::Neg: return std::make_unique(descriptor, info, m_CLCompileContext); case UnaryOperation::Rsqrt: { RsqrtQueueDescriptor rsqrtQueueDescriptor; rsqrtQueueDescriptor.m_Inputs = descriptor.m_Inputs; rsqrtQueueDescriptor.m_Outputs = descriptor.m_Outputs; return std::make_unique(rsqrtQueueDescriptor, info, m_CLCompileContext); } case UnaryOperation::Sin: return std::make_unique(descriptor, info, m_CLCompileContext); default: return nullptr; } } std::unique_ptr ClWorkloadFactory::CreateFill(const FillQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateFloor(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateFullyConnected(const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateGather(const GatherQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info); } std::unique_ptr ClWorkloadFactory::CreateInstanceNormalization( const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateLogicalBinary(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info) const { switch(descriptor.m_Parameters.m_Operation) { case LogicalBinaryOperation::LogicalAnd: return std::make_unique(descriptor, info, m_CLCompileContext); case LogicalBinaryOperation::LogicalOr: return std::make_unique(descriptor, info, m_CLCompileContext); default: return nullptr; } } std::unique_ptr ClWorkloadFactory::CreateLogSoftmax(const LogSoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateMaximum(const MaximumQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateMean(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateMemCopy(const MemCopyQueueDescriptor& descriptor, const WorkloadInfo& info) const { if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) { throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemCopy workload"); } return MakeWorkload(descriptor, info); } std::unique_ptr ClWorkloadFactory::CreateMemImport(const MemImportQueueDescriptor& descriptor, const WorkloadInfo& info) const { if (descriptor.m_Inputs.empty() || !descriptor.m_Inputs[0]) { throw InvalidArgumentException("ClWorkloadFactory: Invalid null input for MemImport workload"); } return std::make_unique(descriptor, info); } std::unique_ptr ClWorkloadFactory::CreateMinimum(const MinimumQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateMultiplication(const MultiplicationQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateNormalization(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info); } std::unique_ptr ClWorkloadFactory::CreatePad(const PadQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreatePreCompiled(const PreCompiledQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreatePrelu(const PreluQueueDescriptor &descriptor, const WorkloadInfo &info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateQLstm(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateQuantize(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateQuantizedLstm(const QuantizedLstmQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateRank(const RankQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info); } std::unique_ptr ClWorkloadFactory::CreateReduce(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info); } std::unique_ptr ClWorkloadFactory::CreateReshape(const ReshapeQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateResize(const ResizeQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateSlice(const SliceQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateSoftmax(const SoftmaxQueueDescriptor& descriptor, const WorkloadInfo& info) const { return std::make_unique(descriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateSpaceToBatchNd(const SpaceToBatchNdQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateSpaceToDepth(const SpaceToDepthQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateStack(const StackQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateStridedSlice(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateSubtraction(const SubtractionQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateTranspose(const TransposeQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_CLCompileContext); } std::unique_ptr ClWorkloadFactory::CreateTransposeConvolution2d( const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager(), m_CLCompileContext); } } // namespace armnn