ArmNN
 21.05
ClBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "ClBackend.hpp"
7 #include "ClBackendContext.hpp"
8 #include "ClBackendId.hpp"
11 #include "ClLayerSupport.hpp"
13 #include "ClWorkloadFactory.hpp"
14 
16 #include <armnn/Descriptors.hpp>
17 
21 
25 
34 
35 #include <Optimizer.hpp>
36 
37 #include <arm_compute/core/Types.h>
38 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
39 
40 namespace armnn
41 {
42 
44 {
45  static const BackendId s_Id{ClBackendId()};
46  return s_Id;
47 }
48 
50 {
51  return std::make_unique<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
52 }
53 
55  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
56 {
57  return std::make_unique<ClWorkloadFactory>(
58  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
59 }
60 
62  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
63 {
64  return std::make_unique<ClWorkloadFactory>(
65  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
66 }
67 
69  TensorHandleFactoryRegistry& registry) const
70 {
71  auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
72 
73  registry.RegisterMemoryManager(memoryManager);
74  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
75  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
76  static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
77 
78  return std::make_unique<ClWorkloadFactory>(
79  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager));
80 }
81 
83  TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const
84 {
85  auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
86 
87  registry.RegisterMemoryManager(memoryManager);
88  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
89  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
90  static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
91 
92  return std::make_unique<ClWorkloadFactory>(
93  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
94 }
95 
98  const ModelOptions& modelOptions,
99  MemorySourceFlags inputFlags,
100  MemorySourceFlags outputFlags) const
101 {
102  auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
103 
104  registry.RegisterMemoryManager(memoryManager);
105  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager));
106  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
107 
108  return std::make_unique<ClWorkloadFactory>(
109  PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
110 }
111 
112 std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const
113 {
114  return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic(),
116 }
117 
119 {
120  auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
121 
122  registry.RegisterMemoryManager(mgr);
123  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
124  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(
125  static_cast<MemorySourceFlags>(MemorySource::Malloc), static_cast<MemorySourceFlags>(MemorySource::Malloc)));
126 }
127 
129  MemorySourceFlags inputFlags,
130  MemorySourceFlags outputFlags)
131 {
132  auto mgr = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>());
133 
134  registry.RegisterMemoryManager(mgr);
135  registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(mgr));
136  registry.RegisterFactory(std::make_unique<ClImportTensorHandleFactory>(inputFlags, outputFlags));
137 }
138 
140 {
141  return IBackendContextPtr{new ClBackendContext{options}};
142 }
143 
146 {
148 }
149 
151 {
152  return Optimizations{};
153 }
154 
156  const ModelOptions& modelOptions) const
157 {
158  return IBackendSpecificModelContextPtr{new ClBackendModelContext{modelOptions}};
159 }
160 
162 {
163  static ILayerSupportSharedPtr layerSupport
164  {
166  };
167  return layerSupport;
168 }
169 
171 {
172  static ILayerSupportSharedPtr layerSupport
173  {
175  };
176  return layerSupport;
177 }
178 
179 bool ClBackend::HasCapability(BackendCapability capabilityClass) const
180 {
181  auto search = gpuAccCapabilities.find(capabilityClass);
182  if (search != gpuAccCapabilities.end())
183  {
184  return true;
185  }
186  return false;
187 }
188 
190  const ModelOptions& modelOptions) const
191 {
192  OptimizationViews optimizationViews;
193 
194  auto it = subgraph.end();
195  bool isFastMathEnabled = false;
196  std::map<LayerGuid, Layer*> untouched;
197 
198  while (it != subgraph.begin())
199  {
200  --it;
201  Layer& base = **it;
202  untouched.insert({base.GetGuid(), &base});
203  }
204 
205  it = subgraph.end();
206 #if defined(ARMCOMPUTECL_ENABLED)
208 
209  if (modelContextPtr)
210  {
211  auto clModelOptions = dynamic_cast<ClBackendModelContext*>(modelContextPtr.get());
212  if (clModelOptions)
213  {
214  isFastMathEnabled = clModelOptions->IsFastMathEnabled();
215  }
216  }
217 #endif
218  while (it != subgraph.begin())
219  {
220  --it;
221  Layer& base = **it;
222 
226  || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
227  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
228  {
229  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
230  {
231  if (output->GetNumConnections() == 1)
232  {
233  for (auto&& childInput : output->GetConnections())
234  {
235  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
236  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
237  {
238  Layer& child = childInput->GetOwningLayer();
239 
240  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
241 
242  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
243  base.GetName();
244 
245  // Get params from activation layer
246  ActivationDescriptor activationDesc = activationLayer->GetParameters();
247 
248  if (base.GetType() == LayerType::Convolution2d)
249  {
250  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
251 
252  Optional<TensorInfo> biases;
253 
254  if (baseLayer->GetParameters().m_BiasEnabled)
255  {
256  biases = baseLayer->m_Bias->GetTensorInfo();
257  }
258 
261  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
262  baseLayer->GetParameters(),
263  baseLayer->m_Weight->GetTensorInfo(),
264  biases,
265  isFastMathEnabled,
266  &activationDesc);
267 
268  if (status)
269  {
270  FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
271  baseLayer,
272  activationLayer,
273  activationDesc,
274  name);
275  untouched.erase(baseLayer->GetGuid());
276  untouched.erase(activationLayer->GetGuid());
277  }
278  }
279  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
280  {
281  DepthwiseConvolution2dLayer* baseLayer =
282  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
283 
284  Optional<TensorInfo> biases;
285 
286  if (baseLayer->GetParameters().m_BiasEnabled)
287  {
288  biases = baseLayer->m_Bias->GetTensorInfo();
289  }
290 
293  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
294  baseLayer->GetParameters(),
295  baseLayer->m_Weight->GetTensorInfo(),
296  biases,
297  &activationDesc);
298 
299  if (status)
300  {
301  FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
302  baseLayer,
303  activationLayer,
304  activationDesc,
305  name);
306  untouched.erase(baseLayer->GetGuid());
307  untouched.erase(activationLayer->GetGuid());
308  }
309  }
310  else if (base.GetType() == LayerType::FullyConnected)
311  {
312  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
313 
316  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
317  baseLayer->m_Weight->GetTensorInfo(),
318  baseLayer->m_Bias->GetTensorInfo(),
319  baseLayer->GetParameters(),
320  &activationDesc);
321 
322  if (status)
323  {
324  FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
325  baseLayer,
326  activationLayer,
327  activationDesc,
328  name);
329  untouched.erase(baseLayer->GetGuid());
330  untouched.erase(activationLayer->GetGuid());
331  }
332  }
333  else if (base.GetType() == LayerType::BatchNormalization)
334  {
335  BatchNormalizationLayer* baseLayer =
336  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
337 
340  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
341  baseLayer->m_Mean->GetTensorInfo(),
342  baseLayer->m_Variance->GetTensorInfo(),
343  baseLayer->m_Beta->GetTensorInfo(),
344  baseLayer->m_Gamma->GetTensorInfo(),
345  baseLayer->GetParameters(),
346  &activationDesc);
347 
348  if (status)
349  {
350  BatchNormalizationLayer* replacementLayer =
351  FuseLayerWithParameters<BatchNormalizationLayer>(optimizationViews,
352  baseLayer,
353  activationLayer,
354  activationDesc,
355  name);
356 
357  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
358  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
359  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
360  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
361  untouched.erase(baseLayer->GetGuid());
362  untouched.erase(activationLayer->GetGuid());
363  }
364  }
365  else if (base.GetType() == LayerType::Addition)
366  {
367  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
368 
372  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
373  &activationDesc);
374 
375  if (status)
376  {
377  FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
378  baseLayer,
379  activationLayer,
380  activationDesc,
381  name);
382  untouched.erase(baseLayer->GetGuid());
383  untouched.erase(activationLayer->GetGuid());
384  }
385  }
386  else if (base.GetType() == LayerType::Division)
387  {
388  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
389 
393  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
394  &activationDesc);
395 
396  if (status)
397  {
398  FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
399  baseLayer,
400  activationLayer,
401  activationDesc,
402  name);
403  untouched.erase(baseLayer->GetGuid());
404  untouched.erase(activationLayer->GetGuid());
405  }
406  }
407  else if (base.GetType() == LayerType::Multiplication)
408  {
409  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
410 
414  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
415  &activationDesc);
416 
417  if (status)
418  {
419  FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
420  baseLayer,
421  activationLayer,
422  activationDesc,
423  name);
424  untouched.erase(baseLayer->GetGuid());
425  untouched.erase(activationLayer->GetGuid());
426  }
427  }
428  else if (base.GetType() == LayerType::Subtraction)
429  {
430  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
431 
435  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
436  &activationDesc);
437 
438  if (status)
439  {
440  FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
441  baseLayer,
442  activationLayer,
443  activationDesc,
444  name);
445  untouched.erase(baseLayer->GetGuid());
446  untouched.erase(activationLayer->GetGuid());
447  }
448  }
449  }
450  }
451  }
452  }
453  }
454  }
455 
456  if (optimizationViews.GetSubstitutions().empty())
457  {
458  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
459  }
460  else
461  {
462  ReportUntouchedLayers(optimizationViews, untouched);
463  }
464 
465  return optimizationViews;
466 }
467 
468 } // namespace armnn
arm_compute::Status ClAdditionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
arm_compute::Status ClFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
static const FactoryId & GetIdStatic()
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
std::vector< OptimizationPtr > Optimizations
arm_compute::Status ClDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
const Parameters & GetParameters() const
This layer represents a depthwise convolution 2d operation.
constexpr const char * ClBackendId()
Definition: ClBackendId.hpp:10
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status ClSubtractionValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: ClBackend.cpp:49
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: ClBackend.cpp:118
The SubgraphView class represents a subgraph of a Graph.
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:155
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:316
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: ClBackend.cpp:189
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
BackendCapability
BackendCapability class.
Definition: Types.hpp:220
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: ClBackend.cpp:139
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status ClMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: ClBackend.cpp:112
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:30
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:55
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
const std::set< armnn::BackendCapability > gpuAccCapabilities
Definition: ClBackend.hpp:12
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: ClBackend.cpp:54
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:245
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: ClBackend.cpp:161
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: ClBackend.cpp:144
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:246
arm_compute::Status ClBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &desc, const ActivationDescriptor *activationDescriptor)
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:311
This layer represents a convolution 2d operation.
bool HasCapability(BackendCapability capabilityClass) const override
Returns true if backend support the capability false otherwise.
Definition: ClBackend.cpp:179
The ClBackendModelContext is used to pass in CL specific backend ModelOptions.
This layer represents a multiplication operation.
IBackendInternal::Optimizations GetOptimizations() const override
Definition: ClBackend.cpp:150
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:63
static const BackendId & GetIdStatic()
Definition: ClBackend.cpp:43
arm_compute::Status ClDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we&#39;ll keep it in the backend namespace...
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:342
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:322
std::unique_ptr< IBackendContext > IBackendContextPtr