39 #include <arm_compute/core/Types.h>
40 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
57 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
63 return std::make_unique<ClWorkloadFactory>(
64 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
70 return std::make_unique<ClWorkloadFactory>(
77 std::shared_ptr<ClMemoryManager> memoryManager;
84 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
87 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
88 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
98 return std::make_unique<ClWorkloadFactory>(
99 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
105 std::shared_ptr<ClMemoryManager> memoryManager;
112 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
115 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
116 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
126 return std::make_unique<ClWorkloadFactory>(
145 std::shared_ptr<ClMemoryManager> memoryManager;
152 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
155 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
156 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
157 inputFlags, outputFlags);
166 return std::make_unique<ClWorkloadFactory>(
178 std::shared_ptr<ClMemoryManager> memoryManager;
185 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
188 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
189 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
214 std::shared_ptr<ClMemoryManager> memoryManager;
221 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
224 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
225 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
226 inputFlags, outputFlags);
273 return std::make_unique<ClBackendDefaultAllocator>();
282 bool isFastMathEnabled =
false;
283 std::map<LayerGuid, Layer*> untouched;
288 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
289 untouched.insert({base.
GetGuid(), &base});
293 #if defined(ARMCOMPUTECL_ENABLED)
308 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
319 if (output->GetNumConnections() == 1)
321 for (
auto&& childInput : output->GetConnections())
324 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
326 Layer& child = childInput->GetOwningLayer();
328 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
330 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
349 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
358 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
363 untouched.erase(baseLayer->
GetGuid());
364 untouched.erase(activationLayer->GetGuid());
370 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
381 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
389 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
394 untouched.erase(baseLayer->
GetGuid());
395 untouched.erase(activationLayer->GetGuid());
412 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
420 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
425 untouched.erase(baseLayer->
GetGuid());
426 untouched.erase(activationLayer->GetGuid());
432 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
437 baseLayer->
m_Mean->GetTensorInfo(),
439 baseLayer->
m_Beta->GetTensorInfo(),
440 baseLayer->
m_Gamma->GetTensorInfo(),
447 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
453 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
455 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
457 untouched.erase(baseLayer->
GetGuid());
458 untouched.erase(activationLayer->GetGuid());
463 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
468 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
473 FuseAdditionLayer<AdditionLayer>(optimizationViews,
478 untouched.erase(baseLayer->
GetGuid());
479 untouched.erase(activationLayer->GetGuid());
484 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
489 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
494 FuseDivisionLayer<DivisionLayer>(optimizationViews,
499 untouched.erase(baseLayer->
GetGuid());
500 untouched.erase(activationLayer->GetGuid());
510 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
515 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
520 untouched.erase(baseLayer->
GetGuid());
521 untouched.erase(activationLayer->GetGuid());
526 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
531 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
536 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
541 untouched.erase(baseLayer->
GetGuid());
542 untouched.erase(activationLayer->GetGuid());
554 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
557 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
560 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
565 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
566 untouched.erase(baseLayer->
GetGuid());
574 Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
579 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
587 FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
588 poolingDescriptor, padLayer);
589 untouched.erase(baseLayer->
GetGuid());
590 untouched.erase(padLayer->
GetGuid());
605 return optimizationViews;