// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #include "ClBackend.hpp" #include "ClBackendContext.hpp" #include "ClBackendId.hpp" #include "ClBackendModelContext.hpp" #include "ClImportTensorHandleFactory.hpp" #include "ClLayerSupport.hpp" #include "ClTensorHandleFactory.hpp" #include "ClWorkloadFactory.hpp" #include #include #include #include #include #include #include #include #include "workloads/ClAdditionWorkload.hpp" #include "workloads/ClBatchNormalizationFloatWorkload.hpp" #include "workloads/ClConvolution2dWorkload.hpp" #include "workloads/ClDepthwiseConvolutionWorkload.hpp" #include "workloads/ClDivisionWorkload.hpp" #include "workloads/ClFullyConnectedWorkload.hpp" #include "workloads/ClMultiplicationWorkload.hpp" #include "workloads/ClReduceWorkload.hpp" #include "workloads/ClSubtractionWorkload.hpp" #include #include #include namespace armnn { const BackendId& ClBackend::GetIdStatic() { static const BackendId s_Id{ClBackendId()}; return s_Id; } IBackendInternal::IMemoryManagerUniquePtr ClBackend::CreateMemoryManager() const { return std::make_unique(std::make_unique()); } IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const { return std::make_unique( PolymorphicPointerDowncast(memoryManager)); } IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const { return std::make_unique( PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); } IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const { auto memoryManager = std::make_shared(std::make_unique()); registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique(memoryManager)); registry.RegisterFactory(std::make_unique( static_cast(MemorySource::Malloc), static_cast(MemorySource::Malloc))); return std::make_unique( PolymorphicPointerDowncast(memoryManager)); } IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const { auto memoryManager = std::make_shared(std::make_unique()); registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique(memoryManager)); registry.RegisterFactory(std::make_unique( static_cast(MemorySource::Malloc), static_cast(MemorySource::Malloc))); return std::make_unique( PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); } IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions, MemorySourceFlags inputFlags, MemorySourceFlags outputFlags) const { auto memoryManager = std::make_shared(std::make_unique()); registry.RegisterMemoryManager(memoryManager); registry.RegisterFactory(std::make_unique(memoryManager)); registry.RegisterFactory(std::make_unique(inputFlags, outputFlags)); return std::make_unique( PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); } std::vector ClBackend::GetHandleFactoryPreferences() const { return std::vector {ClTensorHandleFactory::GetIdStatic(), ClImportTensorHandleFactory::GetIdStatic()}; } void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) { auto mgr = std::make_shared(std::make_unique()); registry.RegisterMemoryManager(mgr); registry.RegisterFactory(std::make_unique(mgr)); registry.RegisterFactory(std::make_unique( static_cast(MemorySource::Malloc), static_cast(MemorySource::Malloc))); } void ClBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry, MemorySourceFlags inputFlags, MemorySourceFlags outputFlags) { auto mgr = std::make_shared(std::make_unique()); registry.RegisterMemoryManager(mgr); registry.RegisterFactory(std::make_unique(mgr)); registry.RegisterFactory(std::make_unique(inputFlags, outputFlags)); } IBackendInternal::IBackendContextPtr ClBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const { return IBackendContextPtr{new ClBackendContext{options}}; } IBackendInternal::IBackendProfilingContextPtr ClBackend::CreateBackendProfilingContext( const IRuntime::CreationOptions&, IBackendProfilingPtr&) { return IBackendProfilingContextPtr{}; } IBackendInternal::Optimizations ClBackend::GetOptimizations() const { return Optimizations{}; } IBackendInternal::IBackendSpecificModelContextPtr ClBackend::CreateBackendSpecificModelContext( const ModelOptions& modelOptions) const { return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}}; } IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport() const { static ILayerSupportSharedPtr layerSupport { new ClLayerSupport(IBackendInternal::IBackendSpecificModelContextPtr{}) }; return layerSupport; } IBackendInternal::ILayerSupportSharedPtr ClBackend::GetLayerSupport(const ModelOptions& modelOptions) const { static ILayerSupportSharedPtr layerSupport { new ClLayerSupport(CreateBackendSpecificModelContext(modelOptions)) }; return layerSupport; } OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, const ModelOptions& modelOptions) const { OptimizationViews optimizationViews; auto it = subgraph.end(); bool isFastMathEnabled = false; std::map untouched; while (it != subgraph.begin()) { --it; Layer& base = **it; untouched.insert({base.GetGuid(), &base}); } it = subgraph.end(); #if defined(ARMCOMPUTECL_ENABLED) IBackendInternal::IBackendSpecificModelContextPtr modelContextPtr = CreateBackendSpecificModelContext(modelOptions); if (modelContextPtr) { auto clModelOptions = dynamic_cast(modelContextPtr.get()); if (clModelOptions) { isFastMathEnabled = clModelOptions->IsFastMathEnabled(); } } #endif while (it != subgraph.begin()) { --it; Layer& base = **it; // Fuse activation into previous layer if supported by backend if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) && (base.GetAdditionalInformation() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) { if (output->GetNumConnections() == 1) { for (auto&& childInput : output->GetConnections()) { if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) && (checkDataTypeInputandOutput(childInput->GetOwningLayer()))) { Layer& child = childInput->GetOwningLayer(); auto* activationLayer = PolymorphicDowncast(&child); const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") + base.GetName(); // Get params from activation layer ActivationDescriptor activationDesc = activationLayer->GetParameters(); if (base.GetType() == LayerType::Convolution2d) { Convolution2dLayer* baseLayer = PolymorphicDowncast(&base); Optional biases; if (baseLayer->GetParameters().m_BiasEnabled) { biases = baseLayer->m_Bias->GetTensorInfo(); } arm_compute::Status status = ClConvolution2dWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetParameters(), baseLayer->m_Weight->GetTensorInfo(), biases, isFastMathEnabled, &activationDesc); if (status) { FuseLayerWithWeightsAndBiases(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::DepthwiseConvolution2d) { DepthwiseConvolution2dLayer* baseLayer = PolymorphicDowncast(&base); Optional biases; if (baseLayer->GetParameters().m_BiasEnabled) { biases = baseLayer->m_Bias->GetTensorInfo(); } arm_compute::Status status = ClDepthwiseConvolutionWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetParameters(), baseLayer->m_Weight->GetTensorInfo(), biases, &activationDesc); if (status) { FuseLayerWithWeightsAndBiases(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::FullyConnected) { FullyConnectedLayer* baseLayer = PolymorphicDowncast(&base); arm_compute::Status status = ClFullyConnectedWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->m_Weight->GetTensorInfo(), baseLayer->m_Bias->GetTensorInfo(), baseLayer->GetParameters(), &activationDesc); if (status) { FuseLayerWithWeightsAndBiases(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::BatchNormalization) { BatchNormalizationLayer* baseLayer = PolymorphicDowncast(&base); arm_compute::Status status = ClBatchNormalizationValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->m_Mean->GetTensorInfo(), baseLayer->m_Variance->GetTensorInfo(), baseLayer->m_Beta->GetTensorInfo(), baseLayer->m_Gamma->GetTensorInfo(), baseLayer->GetParameters(), &activationDesc); if (status) { BatchNormalizationLayer* replacementLayer = FuseLayerWithParameters(optimizationViews, baseLayer, activationLayer, activationDesc, name); replacementLayer->m_Beta = std::move(baseLayer->m_Beta); replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma); replacementLayer->m_Mean = std::move(baseLayer->m_Mean); replacementLayer->m_Variance = std::move(baseLayer->m_Variance); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::Addition) { AdditionLayer* baseLayer = PolymorphicDowncast(&base); arm_compute::Status status = ClAdditionValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), &activationDesc); if (status) { FuseLayerWithoutParameters(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::Division) { DivisionLayer* baseLayer = PolymorphicDowncast(&base); arm_compute::Status status = ClDivisionWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), &activationDesc); if (status) { FuseLayerWithoutParameters(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::Multiplication) { MultiplicationLayer* baseLayer = PolymorphicDowncast(&base); arm_compute::Status status = ClMultiplicationWorkloadValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), &activationDesc); if (status) { FuseLayerWithoutParameters(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } else if (base.GetType() == LayerType::Subtraction) { SubtractionLayer* baseLayer = PolymorphicDowncast(&base); arm_compute::Status status = ClSubtractionValidate( baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), &activationDesc); if (status) { FuseLayerWithoutParameters(optimizationViews, baseLayer, activationLayer, activationDesc, name); untouched.erase(baseLayer->GetGuid()); untouched.erase(activationLayer->GetGuid()); } } } } } } } // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis. if (base.GetType() == LayerType::Reduce) { ReduceLayer* baseLayer = PolymorphicDowncast(&base); ReduceDescriptor reduceDescriptor = baseLayer->GetParameters(); if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1) { // Add new layers to the graph and connect them. std::vector layers = ChainReduceLayers(optimizationViews, baseLayer, reduceDescriptor); // Replace existing baselayer with new subgraph. ReplaceLayers(optimizationViews, baseLayer, layers); untouched.erase(baseLayer->GetGuid()); } } } if (optimizationViews.GetSubstitutions().empty()) { optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); } else { ReportUntouchedLayers(optimizationViews, untouched); } return optimizationViews; } } // namespace armnn