ArmNN
 21.08
ClBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClBackend.hpp"
7 #include "ClBackendContext.hpp"
8 #include "ClBackendId.hpp"
11 #include "ClLayerSupport.hpp"
13 #include "ClWorkloadFactory.hpp"
14 
16 #include <armnn/Descriptors.hpp>
17 
21 
25 
35 
36 #include <Optimizer.hpp>
37 
38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
40 
41 namespace armnn
42 {
43 
45 {
46  static const BackendId s_Id{ClBackendId()};
47  return s_Id;
48 }
49 
51 {
53  {
54  return std::make_unique<ClMemoryManager>(m_CustomAllocator);
55  }
56  return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
57 }
58 
60  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
61 {
62  return std::make_unique<ClWorkloadFactory>(
63  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
64 }
65 
67  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
68 {
69  return std::make_unique<ClWorkloadFactory>(
70  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
71 }
72 
74  TensorHandleFactoryRegistry& registry) const
75 {
76  std::shared_ptr<ClMemoryManager> memoryManager;
78  {
79  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
80  }
81  else
82  {
83  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
84  }
85 
86  registry.RegisterMemoryManager(memoryManager);
87  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
88  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
89  static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
90 
91  return std::make_unique<ClWorkloadFactory>(
92  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
93 }
94 
96  TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
97 {
98  std::shared_ptr<ClMemoryManager> memoryManager;
100  {
101  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
102  }
103  else
104  {
105  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
106  }
107 
108  registry.RegisterMemoryManager(memoryManager);
109  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
110  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
111  static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
112 
113  return std::make_unique<ClWorkloadFactory>(
114  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
115 }
116 
118  TensorHandleFactoryRegistry& registry,
119  const ModelOptions& modelOptions,
120  MemorySourceFlags inputFlags,
121  MemorySourceFlags outputFlags) const
122 {
123  std::shared_ptr<ClMemoryManager> memoryManager;
125  {
126  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
127  }
128  else
129  {
130  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
131  }
132 
133  registry.RegisterMemoryManager(memoryManager);
134  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
135  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
136 
137  return std::make_unique<ClWorkloadFactory>(
138  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
139 }
140 
141 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
142 {
143  return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
145 }
146 
148 {
149  std::shared_ptr<ClMemoryManager> memoryManager;
151  {
152  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
153  }
154  else
155  {
156  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
157  }
158 
159  registry.RegisterMemoryManager(memoryManager);
160  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
161  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
162  static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
163 }
164 
166  MemorySourceFlags inputFlags,
167  MemorySourceFlags outputFlags)
168 {
169  std::shared_ptr<ClMemoryManager> memoryManager;
171  {
172  memoryManager = std::make_shared<ClMemoryManager>(m_CustomAllocator);
173  }
174  else
175  {
176  memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
177  }
178 
179  registry.RegisterMemoryManager(memoryManager);
180  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
181  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
182 }
183 
185 {
186  return IBackendContextPtr{new ClBackendContext{options}};
187 }
188 
191 {
193 }
194 
196 {
197  return Optimizations{};
198 }
199 
201  const ModelOptions& modelOptions) const
202 {
203  return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
204 }
205 
207 {
208  static ILayerSupportSharedPtr layerSupport
209  {
211  };
212  return layerSupport;
213 }
214 
216 {
217  static ILayerSupportSharedPtr layerSupport
218  {
220  };
221  return layerSupport;
222 }
223 
225  const ModelOptions& modelOptions) const
226 {
227  OptimizationViews optimizationViews;
228 
229  auto it = subgraph.end();
230  bool isFastMathEnabled = false;
231  std::map<LayerGuid, Layer*> untouched;
232 
233  while (it != subgraph.begin())
234  {
235  --it;
236  Layer& base = **it;
237  untouched.insert({base.GetGuid(), &base});
238  }
239 
240  it = subgraph.end();
241 #if defined(ARMCOMPUTECL_ENABLED)
243 
244  if (modelContextPtr)
245  {
246  auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
247  if (clModelOptions)
248  {
249  isFastMathEnabled = clModelOptions->IsFastMathEnabled();
250  }
251  }
252 #endif
253  while (it != subgraph.begin())
254  {
255  --it;
256  Layer& base = **it;
257 
258  // Fuse activation into previous layer if supported by backend
262  || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
263  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
264  {
265  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
266  {
267  if (output->GetNumConnections() == 1)
268  {
269  for (auto&& childInput : output->GetConnections())
270  {
271  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
272  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
273  {
274  Layer& child = childInput->GetOwningLayer();
275 
276  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
277 
278  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
279  base.GetName();
280 
281  // Get params from activation layer
282  ActivationDescriptor activationDesc = activationLayer->GetParameters();
283 
284  if (base.GetType() == LayerType::Convolution2d)
285  {
286  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
287 
288  Optional<TensorInfo> biases;
289 
290  if (baseLayer->GetParameters().m_BiasEnabled)
291  {
292  biases = baseLayer->m_Bias->GetTensorInfo();
293  }
294 
297  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
298  baseLayer->GetParameters(),
299  baseLayer->m_Weight->GetTensorInfo(),
300  biases,
301  isFastMathEnabled,
302  &activationDesc);
303 
304  if (status)
305  {
306  FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
307  baseLayer,
308  activationLayer,
309  activationDesc,
310  name);
311  untouched.erase(baseLayer->GetGuid());
312  untouched.erase(activationLayer->GetGuid());
313  }
314  }
315  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
316  {
317  DepthwiseConvolution2dLayer* baseLayer =
318  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
319 
320  Optional<TensorInfo> biases;
321 
322  if (baseLayer->GetParameters().m_BiasEnabled)
323  {
324  biases = baseLayer->m_Bias->GetTensorInfo();
325  }
326 
329  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
330  baseLayer->GetParameters(),
331  baseLayer->m_Weight->GetTensorInfo(),
332  biases,
333  &activationDesc);
334 
335  if (status)
336  {
337  FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
338  baseLayer,
339  activationLayer,
340  activationDesc,
341  name);
342  untouched.erase(baseLayer->GetGuid());
343  untouched.erase(activationLayer->GetGuid());
344  }
345  }
346  else if (base.GetType() == LayerType::FullyConnected)
347  {
348  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
349 
352  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
353  baseLayer->m_Weight->GetTensorInfo(),
354  baseLayer->m_Bias->GetTensorInfo(),
355  baseLayer->GetParameters(),
356  &activationDesc);
357 
358  if (status)
359  {
360  FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
361  baseLayer,
362  activationLayer,
363  activationDesc,
364  name);
365  untouched.erase(baseLayer->GetGuid());
366  untouched.erase(activationLayer->GetGuid());
367  }
368  }
369  else if (base.GetType() == LayerType::BatchNormalization)
370  {
371  BatchNormalizationLayer* baseLayer =
372  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
373 
376  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
377  baseLayer->m_Mean->GetTensorInfo(),
378  baseLayer->m_Variance->GetTensorInfo(),
379  baseLayer->m_Beta->GetTensorInfo(),
380  baseLayer->m_Gamma->GetTensorInfo(),
381  baseLayer->GetParameters(),
382  &activationDesc);
383 
384  if (status)
385  {
386  BatchNormalizationLayer* replacementLayer =
387  FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
388  baseLayer,
389  activationLayer,
390  activationDesc,
391  name);
392 
393  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
394  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
395  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
396  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
397  untouched.erase(baseLayer->GetGuid());
398  untouched.erase(activationLayer->GetGuid());
399  }
400  }
401  else if (base.GetType() == LayerType::Addition)
402  {
403  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
404 
408  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
409  &activationDesc);
410 
411  if (status)
412  {
413  FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
414  baseLayer,
415  activationLayer,
416  activationDesc,
417  name);
418  untouched.erase(baseLayer->GetGuid());
419  untouched.erase(activationLayer->GetGuid());
420  }
421  }
422  else if (base.GetType() == LayerType::Division)
423  {
424  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
425 
429  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
430  &activationDesc);
431 
432  if (status)
433  {
434  FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
435  baseLayer,
436  activationLayer,
437  activationDesc,
438  name);
439  untouched.erase(baseLayer->GetGuid());
440  untouched.erase(activationLayer->GetGuid());
441  }
442  }
443  else if (base.GetType() == LayerType::Multiplication)
444  {
445  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
446 
450  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
451  &activationDesc);
452 
453  if (status)
454  {
455  FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
456  baseLayer,
457  activationLayer,
458  activationDesc,
459  name);
460  untouched.erase(baseLayer->GetGuid());
461  untouched.erase(activationLayer->GetGuid());
462  }
463  }
464  else if (base.GetType() == LayerType::Subtraction)
465  {
466  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
467 
471  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
472  &activationDesc);
473 
474  if (status)
475  {
476  FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
477  baseLayer,
478  activationLayer,
479  activationDesc,
480  name);
481  untouched.erase(baseLayer->GetGuid());
482  untouched.erase(activationLayer->GetGuid());
483  }
484  }
485  }
486  }
487  }
488  }
489  }
490 
491  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
492  if (base.GetType() == LayerType::Reduce)
493  {
494  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
495  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
496 
497  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
498  {
499  // Add new layers to the graph and connect them.
500  std::vector<Layer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
501  baseLayer,
502  reduceDescriptor);
503 
504  // Replace existing baselayer with new subgraph.
505  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
506  untouched.erase(baseLayer->GetGuid());
507  }
508  }
509  }
510 
511  if (optimizationViews.GetSubstitutions().empty())
512  {
513  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
514  }
515  else
516  {
517  ReportUntouchedLayers(optimizationViews, untouched);
518  }
519 
520  return optimizationViews;
521 }
522 
523 } // namespace armnn
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
static const FactoryId & GetIdStatic()
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
std::vector< OptimizationPtr > Optimizations
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
const Parameters & GetParameters() const
This layer represents a depthwise convolution 2d operation.
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
std::shared_ptr< ClBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: ClBackend.hpp:291
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: ClBackend.cpp:50
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:13
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: ClBackend.cpp:147
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:200
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:316
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:224
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
A ReduceDescriptor for the REDUCE operators.
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: ClBackend.cpp:184
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: ClBackend.cpp:141
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:29
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:55
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: ClBackend.cpp:59
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:245
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: ClBackend.cpp:206
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: ClBackend.cpp:189
bool m_UsingCustomAllocator
Definition: ClBackend.hpp:292
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:246
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:311
This layer represents a convolution 2d operation.
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
This layer represents a multiplication operation.
IBackendInternal::Optimizations GetOptimizations() const override
Definition: ClBackend.cpp:195
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:63
static const BackendId & GetIdStatic()
Definition: ClBackend.cpp:44
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we&#39;ll keep it in the backend namespace...
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:342
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:322
std::unique_ptr< IBackendContext > IBackendContextPtr