ArmNN
 23.11
ClBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017-2023 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClBackend.hpp"
7 #include "ClBackendContext.hpp"
9 #include "ClBackendId.hpp"
12 #include "ClLayerSupport.hpp"
14 #include "ClWorkloadFactory.hpp"
15 
17 #include <armnn/Descriptors.hpp>
18 
22 
26 
36 
37 #include <Optimizer.hpp>
38 
39 #include <arm_compute/core/Types.h>
40 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
41 
42 namespace armnn
43 {
44 
46 {
47  static const BackendId s_Id{ClBackendId()};
48  return s_Id;
49 }
50 
52 {
54  {
55  return std::make_unique<ClMemoryManager>(m_CustomAllocator);
56  }
57  return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
58 }
59 
61  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
62 {
63  return std::make_unique<ClWorkloadFactory>(
64  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
65 }
66 
68  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
69 {
70  return std::make_unique<ClWorkloadFactory>(
71  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
72 }
73 
75  TensorHandleFactoryRegistry& registry) const
76 {
77  std::shared_ptr<ClMemoryManager> memoryManager;
79  {
80  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
81  }
82  else
83  {
84  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
85  }
86 
87  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
88  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
90 
91  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
92  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
93 
94  registry.RegisterMemoryManager(memoryManager);
95  registry.RegisterFactory(std::move(factory));
96  registry.RegisterFactory(std::move(importFactory));
97 
98  return std::make_unique<ClWorkloadFactory>(
99  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
100 }
101 
103  TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
104 {
105  std::shared_ptr<ClMemoryManager> memoryManager;
107  {
108  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
109  }
110  else
111  {
112  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
113  }
114 
115  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
116  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
118 
119  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
120  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
121 
122  registry.RegisterMemoryManager(memoryManager);
123  registry.RegisterFactory(std::move(factory));
124  registry.RegisterFactory(std::move(importFactory));
125 
126  return std::make_unique<ClWorkloadFactory>(
127  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
128 }
129 
131  TensorHandleFactoryRegistry& registry,
132  const ModelOptions& modelOptions,
133  MemorySourceFlags inputFlags,
134  MemorySourceFlags outputFlags) const
135 {
136  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
137  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
138  {
139  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
140  }
141  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
142  {
143  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
144  }
145  std::shared_ptr<ClMemoryManager> memoryManager;
147  {
148  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
149  }
150  else
151  {
152  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
153  }
154 
155  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
156  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
157  inputFlags, outputFlags);
158 
159  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
160  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
161 
162  registry.RegisterMemoryManager(memoryManager);
163  registry.RegisterFactory(std::move(factory));
164  registry.RegisterFactory(std::move(importFactory));
165 
166  return std::make_unique<ClWorkloadFactory>(
167  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
168 }
169 
170 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
171 {
172  return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
174 }
175 
177 {
178  std::shared_ptr<ClMemoryManager> memoryManager;
180  {
181  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
182  }
183  else
184  {
185  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
186  }
187 
188  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
189  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
191 
192  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
193  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
194 
195  registry.RegisterMemoryManager(memoryManager);
196  registry.RegisterFactory(std::move(factory));
197  registry.RegisterFactory(std::move(importFactory));
198 
199 }
200 
202  MemorySourceFlags inputFlags,
203  MemorySourceFlags outputFlags)
204 {
205  // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc
206  if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
207  {
208  inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
209  }
210  if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined))
211  {
212  outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc);
213  }
214  std::shared_ptr<ClMemoryManager> memoryManager;
216  {
217  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
218  }
219  else
220  {
221  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
222  }
223 
224  std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<ClTensorHandleFactory>(memoryManager);
225  std::unique_ptr<ITensorHandleFactory> importFactory = std::make_unique<ClImportTensorHandleFactory>(
226  inputFlags, outputFlags);
227 
228  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), importFactory->GetId());
229  registry.RegisterCopyAndImportFactoryPair(importFactory->GetId(), factory->GetId());
230 
231  registry.RegisterMemoryManager(memoryManager);
232  registry.RegisterFactory(std::move(factory));
233  registry.RegisterFactory(std::move(importFactory));
234 }
235 
237 {
238  return IBackendContextPtr{new ClBackendContext{options}};
239 }
240 
243 {
245 }
246 
248  const ModelOptions& modelOptions) const
249 {
250  return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
251 }
252 
254 {
255  static ILayerSupportSharedPtr layerSupport
256  {
258  };
259  return layerSupport;
260 }
261 
263 {
264  static ILayerSupportSharedPtr layerSupport
265  {
267  };
268  return layerSupport;
269 }
270 
271 std::unique_ptr<ICustomAllocator> ClBackend::GetDefaultAllocator() const
272 {
273  return std::make_unique<ClBackendDefaultAllocator>();
274 }
275 
277 {
278  // add new capabilities here..
279  return BackendCapabilities ("GpuAcc",
280  {
281  {"NonConstWeights", true},
282  {"AsyncExecution", false},
283  {"ProtectedContentAllocation", true},
284  {"ConstantTensorsAsInputs", true},
285  {"PreImportIOTensors", false},
286  {"ExternallyManagedMemory", true},
287  {"MultiAxisPacking", false},
288  {"SingleAxisPacking", true},
289  {"HasFp16", arm_compute::CLKernelLibrary::get().fp16_supported()}
290  });
291 }
292 
294  const ModelOptions& modelOptions) const
295 {
296  OptimizationViews optimizationViews(modelOptions);
297 
298  auto it = subgraph.end();
299  bool isFastMathEnabled = false;
300  std::map<LayerGuid, Layer*> untouched;
301 
302  while (it != subgraph.begin())
303  {
304  --it;
305  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
306  untouched.insert({base.GetGuid(), &base});
307  }
308 
309  it = subgraph.end();
310 #if defined(ARMCOMPUTECL_ENABLED)
312 
313  if (modelContextPtr)
314  {
315  auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
316  if (clModelOptions)
317  {
318  isFastMathEnabled = clModelOptions->IsFastMathEnabled();
319  }
320  }
321 #endif
322  while (it != subgraph.begin())
323  {
324  --it;
325  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
326 
327  // Fuse activation into previous layer if supported by backend
333  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
334  {
335  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
336  {
337  if (output->GetNumConnections() == 1)
338  {
339  for (auto&& childInput : output->GetConnections())
340  {
341  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
342  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
343  {
344  Layer& child = childInput->GetOwningLayer();
345 
346  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
347 
348  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
349  base.GetName();
350 
351  // Get params from activation layer
352  ActivationDescriptor activationDesc = activationLayer->GetParameters();
353 
354  if (base.GetType() == LayerType::Convolution2d)
355  {
356  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
357 
358  Optional<TensorInfo> biases;
359 
360  if (baseLayer->GetParameters().m_BiasEnabled)
361  {
362  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
363  }
364 
367  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
368  baseLayer->GetParameters(),
370  biases,
371  isFastMathEnabled,
372  &activationDesc);
373 
374  if (status)
375  {
376  FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
377  baseLayer,
378  activationLayer,
379  activationDesc,
380  name);
381  untouched.erase(baseLayer->GetGuid());
382  untouched.erase(activationLayer->GetGuid());
383  }
384  }
385  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
386  {
387  DepthwiseConvolution2dLayer* baseLayer =
388  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
389 
390  Optional<TensorInfo> biases;
391 
392  if (baseLayer->GetParameters().m_BiasEnabled)
393  {
394  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
395  }
396 
399  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
400  baseLayer->GetParameters(),
402  biases,
403  &activationDesc);
404 
405  if (status)
406  {
407  FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
408  baseLayer,
409  activationLayer,
410  activationDesc,
411  name);
412  untouched.erase(baseLayer->GetGuid());
413  untouched.erase(activationLayer->GetGuid());
414  }
415  }
416  else if (base.GetType() == LayerType::FullyConnected)
417  {
418  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
419  FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
420 
421  // As bias is optional only try to get TensorInfo from input if bias is enabled.
422  Optional<TensorInfo> biases;
423  if (descriptor.m_BiasEnabled)
424  {
425  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
426  }
427 
430  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
432  biases,
433  baseLayer->GetParameters(),
434  &activationDesc);
435 
436  if (status)
437  {
438  FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
439  baseLayer,
440  activationLayer,
441  activationDesc,
442  name);
443  untouched.erase(baseLayer->GetGuid());
444  untouched.erase(activationLayer->GetGuid());
445  }
446  }
447  else if (base.GetType() == LayerType::BatchNormalization)
448  {
449  BatchNormalizationLayer* baseLayer =
450  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
451 
454  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
455  baseLayer->m_Mean->GetTensorInfo(),
456  baseLayer->m_Variance->GetTensorInfo(),
457  baseLayer->m_Beta->GetTensorInfo(),
458  baseLayer->m_Gamma->GetTensorInfo(),
459  baseLayer->GetParameters(),
460  &activationDesc);
461 
462  if (status)
463  {
464  BatchNormalizationLayer* replacementLayer =
465  FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
466  baseLayer,
467  activationLayer,
468  activationDesc,
469  name);
470 
471  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
472  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
473  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
474  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
475 
476  untouched.erase(baseLayer->GetGuid());
477  untouched.erase(activationLayer->GetGuid());
478  }
479  }
480  else if (base.GetType() == LayerType::Addition)
481  {
482  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
483 
487  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
488  &activationDesc);
489 
490  if (status)
491  {
492  FuseAdditionLayer<AdditionLayer>(optimizationViews,
493  baseLayer,
494  activationLayer,
495  activationDesc,
496  name);
497 
498  untouched.erase(baseLayer->GetGuid());
499  untouched.erase(activationLayer->GetGuid());
500  }
501  }
502  else if (base.GetType() == LayerType::Division)
503  {
504  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
505 
509  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
510  &activationDesc);
511 
512  if (status)
513  {
514  FuseDivisionLayer<DivisionLayer>(optimizationViews,
515  baseLayer,
516  activationLayer,
517  activationDesc,
518  name);
519  untouched.erase(baseLayer->GetGuid());
520  untouched.erase(activationLayer->GetGuid());
521  }
522  }
523  else if (base.GetType() == LayerType::Multiplication)
524  {
525  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
526 
530  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
531  &activationDesc);
532 
533  if (status)
534  {
535  FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
536  baseLayer,
537  activationLayer,
538  activationDesc,
539  name);
540  untouched.erase(baseLayer->GetGuid());
541  untouched.erase(activationLayer->GetGuid());
542  }
543  }
544  else if (base.GetType() == LayerType::Subtraction)
545  {
546  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
547 
551  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
552  &activationDesc);
553 
554  if (status)
555  {
556  FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
557  baseLayer,
558  activationLayer,
559  activationDesc,
560  name);
561  untouched.erase(baseLayer->GetGuid());
562  untouched.erase(activationLayer->GetGuid());
563  }
564  }
565  else if (base.GetType() == LayerType::ElementwiseBinary)
566  {
567  ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
568 
569  if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
570  {
574  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
575  &activationDesc);
576 
577  if (status)
578  {
579  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
580  baseLayer,
581  activationLayer,
582  activationDesc,
584  name);
585  untouched.erase(baseLayer->GetGuid());
586  untouched.erase(activationLayer->GetGuid());
587  }
588  }
589  else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
590  {
594  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
595  &activationDesc);
596 
597  if (status)
598  {
599  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
600  baseLayer,
601  activationLayer,
602  activationDesc,
604  name);
605  untouched.erase(baseLayer->GetGuid());
606  untouched.erase(activationLayer->GetGuid());
607  }
608  }
609  else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
610  {
614  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
615  &activationDesc);
616 
617  if (status)
618  {
619  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
620  baseLayer,
621  activationLayer,
622  activationDesc,
624  name);
625  untouched.erase(baseLayer->GetGuid());
626  untouched.erase(activationLayer->GetGuid());
627  }
628  }
629  else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
630  {
634  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
635  &activationDesc);
636 
637  if (status)
638  {
639  FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
640  baseLayer,
641  activationLayer,
642  activationDesc,
644  name);
645  untouched.erase(baseLayer->GetGuid());
646  untouched.erase(activationLayer->GetGuid());
647  }
648  }
649  // No fusion available for other BinaryOperations
650  }
651  }
652  }
653  }
654  }
655  }
656 
657  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
658  if (base.GetType() == LayerType::Reduce)
659  {
660  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
661  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
662 
663  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
664  {
665  // Add new layers to the graph and connect them.
666  std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
667  baseLayer,
668  reduceDescriptor);
669 
670  // Replace existing baselayer with new subgraph.
671  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
672  untouched.erase(baseLayer->GetGuid());
673  }
674  }
675 
676  // Special case to fuse padding into average pooling 2d for quantized datatype.
677  // Required to be done as a backend specific optimization as Neon does not support this special case.
678  if (base.GetType() == LayerType::Pooling2d)
679  {
680  Pooling2dLayer* baseLayer = PolymorphicDowncast<Pooling2dLayer*>(&base);
681  Pooling2dDescriptor poolingDescriptor = baseLayer->GetParameters();
682 
684  {
685  PadLayer* padLayer = PolymorphicDowncast<PadLayer*>(
687  if (padLayer->GetOutputSlot(0).GetNumConnections() == 1 &&
689  poolingDescriptor,
690  padLayer->GetOutputSlot().GetTensorInfo(),
691  true))
692  {
693  FoldPadIntoAveragePool2d<Pooling2dLayer>(optimizationViews, baseLayer,
694  poolingDescriptor, padLayer);
695  untouched.erase(baseLayer->GetGuid());
696  untouched.erase(padLayer->GetGuid());
697  }
698  }
699  }
700  }
701 
702  if (optimizationViews.GetSubstitutions().empty() && optimizationViews.GetDeletedSubgraphs().empty())
703  {
704  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
705  }
706  else
707  {
708  ReportUntouchedLayers(optimizationViews, untouched);
709  }
710 
711  return optimizationViews;
712 }
713 
714 } // namespace armnn
armnn::MemorySource::Malloc
@ Malloc
armnn::OptimizationViews::AddUntouchedSubgraph
void AddUntouchedSubgraph(SubgraphView &&subgraph)
Definition: OptimizationViews.hpp:48
armnn::ClBackend::GetLayerSupport
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: ClBackend.cpp:253
armnn::BinaryOperation::Mul
@ Mul
ClWorkloadFactory.hpp
armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
armnn::FullyConnectedDescriptor
A FullyConnectedDescriptor for the FullyConnectedLayer.
Definition: Descriptors.hpp:507
armnn::BinaryOperation::Add
@ Add
armnn::ClBackend::m_UsingCustomAllocator
bool m_UsingCustomAllocator
Definition: ClBackend.hpp:284
ClDepthwiseConvolutionWorkload.hpp
armnn::LayerType::BatchNormalization
@ BatchNormalization
armnn::ClSubtractionValidate
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClSubtractionWorkload.cpp:46
armnn::Optional
Definition: Optional.hpp:270
Descriptors.hpp
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
armnn::DepthwiseConvolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:708
armnn::ClTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: ClTensorHandleFactory.cpp:93
armnn::ClBackend::RegisterTensorHandleFactories
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: ClBackend.cpp:176
armnn::BatchNormalizationLayer::m_Mean
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
Definition: BatchNormalizationLayer.hpp:19
armnn::SubtractionLayer
This layer represents a subtraction operation.
Definition: SubtractionLayer.hpp:14
armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:99
armnn::ClMultiplicationWorkloadValidate
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClMultiplicationWorkload.cpp:18
armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:23
armnn::BackendCapabilities
BackendOptions BackendCapabilities
Definition: BackendOptions.hpp:19
armnn::DepthwiseConvolution2dLayer
This layer represents a depthwise convolution 2d operation.
Definition: DepthwiseConvolution2dLayer.hpp:15
ClLayerSupport.hpp
armnn::BinaryOperation::Sub
@ Sub
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
armnn::ClBackend::GetHandleFactoryPreferences
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: ClBackend.cpp:170
ClTensorHandleFactory.hpp
BaseMemoryManager.hpp
armnn::Layer::GetOutputSlot
const OutputSlot & GetOutputSlot(unsigned int index=0) const override
Get the const output slot handle by slot index.
Definition: Layer.hpp:339
armnn::TensorHandleFactoryRegistry::RegisterMemoryManager
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
Definition: TensorHandleFactoryRegistry.cpp:34
ClReduceWorkload.hpp
ClBackendDefaultAllocator.hpp
BackendRegistry.hpp
armnn::ClFullyConnectedWorkloadValidate
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: ClFullyConnectedWorkload.cpp:19
armnn::ClConvolution2dWorkloadValidate
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
Definition: ClConvolution2dWorkload.cpp:23
armnn::LayerType::Reduce
@ Reduce
armnn::BatchNormalizationLayer
This layer represents a batch normalization operation.
Definition: BatchNormalizationLayer.hpp:15
armnn::IBackendInternal::IBackendContextPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
Definition: IBackendInternal.hpp:90
Optimizer.hpp
armnn::Layer::GetInputSlot
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:337
armnn::LayerWithParameters::GetParameters
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
Definition: LayerWithParameters.hpp:19
armnn::LayerType::ElementwiseBinary
@ ElementwiseBinary
armnn::Layer::GetName
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:332
armnn::ClBackend::GetDefaultAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
Definition: ClBackend.cpp:271
armnn::Convolution2dLayer
This layer represents a convolution 2d operation.
Definition: Convolution2dLayer.hpp:15
ClDivisionWorkload.hpp
armnn::ClBackend::CreateBackendProfilingContext
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: ClBackend.cpp:241
armnn::Layer
Definition: Layer.hpp:230
armnn::ClImportTensorHandleFactory::GetIdStatic
static const FactoryId & GetIdStatic()
Definition: ClImportTensorHandleFactory.cpp:93
armnn::ClAdditionValidate
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClAdditionWorkload.cpp:45
ClBackendModelContext.hpp
armnn::AdditionLayer
This layer represents an addition operation.
Definition: AdditionLayer.hpp:13
armnn::BatchNormalizationLayer::m_Gamma
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
Definition: BatchNormalizationLayer.hpp:25
armnn::ClBackendModelContext
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
Definition: ClBackendModelContext.hpp:28
armnn::Layer::GetAdditionalInformation
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:368
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
armnn::optimizations::pad_fold::TryFoldPadIntoLayer2d
bool TryFoldPadIntoLayer2d(const PadDescriptor &padDescriptor, Descriptor &layerDescriptor, const TensorInfo &tensorInfo)
Definition: FoldPadIntoLayer2d.hpp:88
armnn::BatchNormalizationLayer::m_Variance
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
Definition: BatchNormalizationLayer.hpp:21
armnn::ClBackend::GetCapabilities
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: ClBackend.cpp:276
armnn::SubgraphView::begin
IConnectableLayerIterator begin()
Definition: SubgraphView.cpp:283
armnn::LayerType::Subtraction
@ Subtraction
armnn::FullyConnectedDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:526
armnn::ReduceLayer
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:14
armnn::MemorySource::Undefined
@ Undefined
armnn::MultiplicationLayer
This layer represents a multiplication operation.
Definition: MultiplicationLayer.hpp:14
armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158
IBackendContext.hpp
PolymorphicDowncast.hpp
armnn::Convolution2dDescriptor::m_BiasEnabled
bool m_BiasEnabled
Enable/disable bias.
Definition: Descriptors.hpp:582
ClFullyConnectedWorkload.hpp
armnn::ClLayerSupport
Definition: ClLayerSupport.hpp:14
armnn::Layer::GetGuid
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:343
armnn::LayerType::Multiplication
@ Multiplication
armnn::ClBackend::m_CustomAllocator
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: ClBackend.hpp:283
armnn::SubgraphView
The SubgraphView class represents a subgraph of a Graph.
Definition: SubgraphView.hpp:31
armnn::LayerType::Addition
@ Addition
armnn::FullyConnectedLayer
This layer represents a fully connected operation.
Definition: FullyConnectedLayer.hpp:15
armnn::OptimizationViews
Definition: OptimizationViews.hpp:17
ArmComputeUtils.hpp
armnn::ElementwiseBinaryLayer
This layer represents a elementwiseBinary operation.
Definition: ElementwiseBinaryLayer.hpp:14
armnn::Pooling2dLayer
This layer represents a pooling 2d operation.
Definition: Pooling2dLayer.hpp:13
armnn::ClBackend::CreateBackendContext
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: ClBackend.cpp:236
armnn::DivisionLayer
This layer represents a division operation.
Definition: DivisionLayer.hpp:14
armnn::ClBackend::CreateWorkloadFactory
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: ClBackend.cpp:60
armnn::LayerType::Pooling2d
@ Pooling2d
ClBatchNormalizationFloatWorkload.hpp
armnn::LayerType::Division
@ Division
armnn::IBackendInternal::IBackendProfilingContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Definition: IBackendInternal.hpp:92
armnn::ClBackend::OptimizeSubgraphView
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:293
ClConvolution2dWorkload.hpp
armnn::LayerType::FullyConnected
@ FullyConnected
armnn::BatchNormalizationLayer::m_Beta
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
Definition: BatchNormalizationLayer.hpp:23
ClBackendId.hpp
ClAdditionWorkload.hpp
armnn::LayerType::DepthwiseConvolution2d
@ DepthwiseConvolution2d
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::Status
Status
Definition: Types.hpp:42
ClBackendContext.hpp
armnn::IBackendInternal::IBackendProfilingPtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
Definition: IBackendInternal.hpp:93
armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:78
armnn::Layer::BeginOutputSlots
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:266
armnn::IBackendInternal::IMemoryManagerUniquePtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
Definition: IBackendInternal.hpp:98
armnn::ReduceDescriptor::m_vAxis
std::vector< uint32_t > m_vAxis
The indices of the dimensions to reduce.
Definition: Descriptors.hpp:1556
armnn::ClBatchNormalizationValidate
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Definition: ClBatchNormalizationFloatWorkload.cpp:19
ClBackend.hpp
armnn::ClDivisionWorkloadValidate
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
Definition: ClDivisionWorkload.cpp:18
armnn::OptimizationViews::GetSubstitutions
const Substitutions & GetSubstitutions() const
Definition: OptimizationViews.hpp:58
armnn::SubgraphView::end
IConnectableLayerIterator end()
Definition: SubgraphView.cpp:288
armnn::OptimizationViews::GetDeletedSubgraphs
const Subgraphs & GetDeletedSubgraphs() const
Definition: OptimizationViews.hpp:61
armnn::ElementwiseBinaryDescriptor::m_Operation
BinaryOperation m_Operation
Specifies the elementwiseBinary operation to execute.
Definition: Descriptors.hpp:125
armnn::BackendId
Definition: BackendId.hpp:75
ClSubtractionWorkload.hpp
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::ClBackend::GetIdStatic
static const BackendId & GetIdStatic()
Definition: ClBackend.cpp:45
armnn::LayerType::Pad
@ Pad
ClImportTensorHandleFactory.hpp
armnn::TensorHandleFactoryRegistry::RegisterFactory
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
Definition: TensorHandleFactoryRegistry.cpp:12
armnn::ClBackend::CreateBackendSpecificModelContext
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:247
ClMultiplicationWorkload.hpp
armnn::ClBackendId
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10
armnn::IBackendInternal::ILayerSupportSharedPtr
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
Definition: IBackendInternal.hpp:94
armnn::TensorHandleFactoryRegistry::RegisterCopyAndImportFactoryPair
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
Definition: TensorHandleFactoryRegistry.cpp:66
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::ReportUntouchedLayers
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer * > untouched)
Definition: SubgraphUtils.hpp:220
armnn::BinaryOperation::Div
@ Div
armnn::LayerType::Convolution2d
@ Convolution2d
armnn::Pooling2dDescriptor
A Pooling2dDescriptor for the Pooling2dLayer.
Definition: Descriptors.hpp:371
armnn::LayerType::Activation
@ Activation
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::ReduceDescriptor
A ReduceDescriptor for the REDUCE operators.
Definition: Descriptors.hpp:1538
armnn::ClDepthwiseConvolutionWorkloadValidate
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
Definition: ClDepthwiseConvolutionWorkload.cpp:26
armnn::ClBackendContext
Definition: ClBackendContext.hpp:17
armnn::Layer::EndOutputSlots
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:267
IMemoryManager.hpp
ArmComputeSubgraphUtils.hpp
armnn::PadLayer
This layer represents a pad operation.
Definition: PadLayer.hpp:14
armnn::IBackendInternal::IBackendSpecificModelContextPtr
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
Definition: IBackendInternal.hpp:96
armnn::ClBackendModelContext::IsFastMathEnabled
bool IsFastMathEnabled() const
Definition: ClBackendModelContext.cpp:66
armnn::ClBackend::CreateMemoryManager
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: ClBackend.cpp:51