39 #include <arm_compute/core/Types.h>
40 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
57 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
63 return std::make_unique<ClWorkloadFactory>(
64 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
70 return std::make_unique<ClWorkloadFactory>(
77 std::shared_ptr<ClMemoryManager> memoryManager;
84 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
87 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
88 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
98 return std::make_unique<ClWorkloadFactory>(
99 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
105 std::shared_ptr<ClMemoryManager> memoryManager;
112 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
115 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
116 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
126 return std::make_unique<ClWorkloadFactory>(
145 std::shared_ptr<ClMemoryManager> memoryManager;
152 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
155 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
156 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
157 inputFlags, outputFlags);
166 return std::make_unique<ClWorkloadFactory>(
178 std::shared_ptr<ClMemoryManager> memoryManager;
185 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
188 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
189 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
214 std::shared_ptr<ClMemoryManager> memoryManager;
221 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
224 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
225 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
226 inputFlags, outputFlags);
273 return std::make_unique<ClBackendDefaultAllocator>();
282 bool isFastMathEnabled =
false;
283 std::map<LayerGuid, Layer*> untouched;
288 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
289 untouched.insert({base.
GetGuid(), &base});
293 #if defined(ARMCOMPUTECL_ENABLED)
308 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
320 if (output->GetNumConnections() == 1)
322 for (
auto&& childInput : output->GetConnections())
325 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
327 Layer& child = childInput->GetOwningLayer();
329 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
331 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
350 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
359 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
364 untouched.erase(baseLayer->
GetGuid());
365 untouched.erase(activationLayer->GetGuid());
371 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
382 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
390 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
395 untouched.erase(baseLayer->
GetGuid());
396 untouched.erase(activationLayer->GetGuid());
413 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
421 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
426 untouched.erase(baseLayer->
GetGuid());
427 untouched.erase(activationLayer->GetGuid());
433 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
437 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
438 baseLayer->
m_Mean->GetTensorInfo(),
440 baseLayer->
m_Beta->GetTensorInfo(),
441 baseLayer->
m_Gamma->GetTensorInfo(),
448 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
454 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
456 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
458 untouched.erase(baseLayer->
GetGuid());
459 untouched.erase(activationLayer->GetGuid());
464 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
469 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
474 FuseAdditionLayer<AdditionLayer>(optimizationViews,
479 untouched.erase(baseLayer->
GetGuid());
480 untouched.erase(activationLayer->GetGuid());
485 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
490 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
495 FuseDivisionLayer<DivisionLayer>(optimizationViews,
500 untouched.erase(baseLayer->
GetGuid());
501 untouched.erase(activationLayer->GetGuid());
511 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
516 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
521 untouched.erase(baseLayer->
GetGuid());
522 untouched.erase(activationLayer->GetGuid());
527 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
532 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
537 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
542 untouched.erase(baseLayer->
GetGuid());
543 untouched.erase(activationLayer->GetGuid());
555 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
560 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
566 untouched.erase(baseLayer->
GetGuid());
567 untouched.erase(activationLayer->GetGuid());
575 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
580 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
586 untouched.erase(baseLayer->
GetGuid());
587 untouched.erase(activationLayer->GetGuid());
595 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
600 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
606 untouched.erase(baseLayer->
GetGuid());
607 untouched.erase(activationLayer->GetGuid());
615 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
620 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
639 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
642 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
645 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
650 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
651 untouched.erase(baseLayer->
GetGuid());
659 Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
664 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
672 FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
673 poolingDescriptor, padLayer);
674 untouched.erase(baseLayer->
GetGuid());
675 untouched.erase(padLayer->
GetGuid());
690 return optimizationViews;