39 #include <arm_compute/core/Types.h> 40 #include <arm_compute/runtime/CL/CLBufferAllocator.h> 57 return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
63 return std::make_unique<ClWorkloadFactory>(
64 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
70 return std::make_unique<ClWorkloadFactory>(
77 std::shared_ptr<ClMemoryManager> memoryManager;
84 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
87 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
88 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
98 return std::make_unique<ClWorkloadFactory>(
99 PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
105 std::shared_ptr<ClMemoryManager> memoryManager;
112 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
115 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
116 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
126 return std::make_unique<ClWorkloadFactory>(
145 std::shared_ptr<ClMemoryManager> memoryManager;
152 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
155 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
156 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
157 inputFlags, outputFlags);
166 return std::make_unique<ClWorkloadFactory>(
178 std::shared_ptr<ClMemoryManager> memoryManager;
185 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
188 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
189 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
214 std::shared_ptr<ClMemoryManager> memoryManager;
221 memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
224 std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
225 std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
226 inputFlags, outputFlags);
273 return std::make_unique<ClBackendDefaultAllocator>();
282 bool isFastMathEnabled =
false;
283 std::map<LayerGuid, Layer*> untouched;
288 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
289 untouched.insert({base.
GetGuid(), &base});
293 #if defined(ARMCOMPUTECL_ENABLED) 308 Layer& base = *(PolymorphicDowncast<Layer*>(*it));
319 if (output->GetNumConnections() == 1)
321 for (
auto&& childInput : output->GetConnections())
324 (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
326 Layer& child = childInput->GetOwningLayer();
328 auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
330 const std::string name = std::string(
"fused-") + child.
GetName() + std::string(
"-into-") +
349 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
358 FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
363 untouched.erase(baseLayer->GetGuid());
364 untouched.erase(activationLayer->GetGuid());
370 PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
381 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
389 FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
394 untouched.erase(baseLayer->GetGuid());
395 untouched.erase(activationLayer->GetGuid());
405 if (descriptor.m_BiasEnabled)
412 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
420 FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
425 untouched.erase(baseLayer->GetGuid());
426 untouched.erase(activationLayer->GetGuid());
432 PolymorphicDowncast<BatchNormalizationLayer*>(&base);
436 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
437 baseLayer->
m_Mean->GetTensorInfo(),
439 baseLayer->
m_Beta->GetTensorInfo(),
440 baseLayer->
m_Gamma->GetTensorInfo(),
447 FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
453 replacementLayer->
m_Beta = std::move(baseLayer->m_Beta);
454 replacementLayer->
m_Gamma = std::move(baseLayer->m_Gamma);
455 replacementLayer->
m_Mean = std::move(baseLayer->m_Mean);
456 replacementLayer->
m_Variance = std::move(baseLayer->m_Variance);
457 untouched.erase(baseLayer->GetGuid());
458 untouched.erase(activationLayer->GetGuid());
463 AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
468 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
473 FuseAdditionLayer<AdditionLayer>(optimizationViews,
478 untouched.erase(baseLayer->GetGuid());
479 untouched.erase(activationLayer->GetGuid());
484 DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
489 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
494 FuseDivisionLayer<DivisionLayer>(optimizationViews,
499 untouched.erase(baseLayer->GetGuid());
500 untouched.erase(activationLayer->GetGuid());
510 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
515 FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
520 untouched.erase(baseLayer->GetGuid());
521 untouched.erase(activationLayer->GetGuid());
526 SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
531 activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
536 FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
541 untouched.erase(baseLayer->GetGuid());
542 untouched.erase(activationLayer->GetGuid());
554 ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
557 if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
560 std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
565 ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
566 untouched.erase(baseLayer->GetGuid());
574 Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
579 PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
587 FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
588 poolingDescriptor, padLayer);
589 untouched.erase(baseLayer->GetGuid());
590 untouched.erase(padLayer->
GetGuid());
605 return optimizationViews;
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
IConnectableLayerIterator endIConnectable()
static const FactoryId & GetIdStatic()
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents a depthwise convolution 2d operation.
constexpr const char * ClBackendId()
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
Layer & GetOwningLayer() const
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
IConnectableLayerIterator beginIConnectable()
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
This layer represents a pad operation.
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
This layer represents a reduction operation.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry ®istry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
unsigned int GetNumConnections() const override
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
This layer represents a fully connected operation.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
A ReduceDescriptor for the REDUCE operators.
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
A FullyConnectedDescriptor for the FullyConnectedLayer.
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
bool IsFastMathEnabled() const
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
static const FactoryId & GetIdStatic()
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace...
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
This layer represents a pooling 2d operation.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
bool m_UsingCustomAllocator
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
const char * GetName() const override
Returns the name of the layer.
This layer represents a convolution 2d operation.
A Pooling2dDescriptor for the Pooling2dLayer.
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
This layer represents a multiplication operation.
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
const TensorInfo & GetTensorInfo() const override
static const BackendId & GetIdStatic()
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< T > GetAdditionalInformation() const
LayerGuid GetGuid() const final
Returns the unique id of the layer.
std::unique_ptr< IBackendContext > IBackendContextPtr