38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/Allocator.h>
52 return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
59 return std::make_unique<NeonWorkloadFactory>(
60 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
66 return std::make_unique<NeonWorkloadFactory>(
73 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
78 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
85 return std::make_unique<NeonWorkloadFactory>(
86 PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
92 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
97 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
103 return std::make_unique<NeonWorkloadFactory>(
148 std::map<LayerGuid, Layer*> untouched;
153 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
154 untouched.insert({base.
GetGuid(), &base});
161 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
172 if (output->GetNumConnections() == 1)
174 for (
auto&& childInput : output->GetConnections())
177 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
179 Layer& child = childInput->GetOwningLayer();
181 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
183 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
202 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
211 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
216 untouched.erase(baseLayer->
GetGuid());
217 untouched.erase(activationLayer->GetGuid());
223 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
234 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
242 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
247 untouched.erase(baseLayer->
GetGuid());
248 untouched.erase(activationLayer->GetGuid());
265 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
273 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
278 untouched.erase(baseLayer->
GetGuid());
279 untouched.erase(activationLayer->GetGuid());
285 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
289 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
290 baseLayer->
m_Mean->GetTensorInfo(),
292 baseLayer->
m_Beta->GetTensorInfo(),
293 baseLayer->
m_Gamma->GetTensorInfo(),
300 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
306 replacementLayer->
m_Beta = std::move(baseLayer->
m_Beta);
308 replacementLayer->
m_Mean = std::move(baseLayer->
m_Mean);
310 untouched.erase(baseLayer->
GetGuid());
311 untouched.erase(activationLayer->GetGuid());
316 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
321 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
326 FuseAdditionLayer<AdditionLayer>(optimizationViews,
331 untouched.erase(baseLayer->
GetGuid());
332 untouched.erase(activationLayer->GetGuid());
337 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
342 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
347 FuseDivisionLayer<DivisionLayer>(optimizationViews,
352 untouched.erase(baseLayer->
GetGuid());
353 untouched.erase(activationLayer->GetGuid());
363 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
368 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
373 untouched.erase(baseLayer->
GetGuid());
374 untouched.erase(activationLayer->GetGuid());
379 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
384 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
389 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
394 untouched.erase(baseLayer->
GetGuid());
395 untouched.erase(activationLayer->GetGuid());
407 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
412 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
418 untouched.erase(baseLayer->
GetGuid());
419 untouched.erase(activationLayer->GetGuid());
427 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
432 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
438 untouched.erase(baseLayer->
GetGuid());
439 untouched.erase(activationLayer->GetGuid());
447 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
452 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
458 untouched.erase(baseLayer->
GetGuid());
459 untouched.erase(activationLayer->GetGuid());
467 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
472 FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
478 untouched.erase(baseLayer->
GetGuid());
479 untouched.erase(activationLayer->GetGuid());
493 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
496 if (!reduceDescriptor.
m_vAxis.empty() && reduceDescriptor.
m_vAxis.size() > 1)
499 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
504 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
505 untouched.erase(baseLayer->
GetGuid());
519 return optimizationViews;
529 auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
534 auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
543 return std::make_unique<DefaultAllocator>();