ArmNN
 22.02
NeonBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackend.hpp"
7 #include "NeonBackendId.hpp"
10 #include "NeonLayerSupport.hpp"
12 
14 #include <armnn/Descriptors.hpp>
15 
19 
22 
24 
35 
36 #include <Optimizer.hpp>
37 
38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/Allocator.h>
40 
41 namespace armnn
42 {
43 
45 {
46  static const BackendId s_Id{NeonBackendId()};
47  return s_Id;
48 }
49 
51 {
52  return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
54 }
55 
57  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
58 {
59  return std::make_unique<NeonWorkloadFactory>(
60  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
61 }
62 
64  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
65 {
66  return std::make_unique<NeonWorkloadFactory>(
67  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
68 }
69 
71  class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
72 {
73  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
75 
76  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
77 
78  auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
79  // Register copy and import factory pair
80  tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
81  // Register the factory
82  tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
83 
84 
85  return std::make_unique<NeonWorkloadFactory>(
86  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
87 }
88 
90  TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
91 {
92  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
94 
95  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
96 
97  auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
98  // Register copy and import factory pair
99  tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
100  // Register the factory
101  tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
102 
103  return std::make_unique<NeonWorkloadFactory>(
104  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
105 }
106 
108 {
109  return IBackendContextPtr{};
110 }
111 
114 {
116 }
117 
119  const ModelOptions& modelOptions) const
120 {
122 }
123 
125 {
126  static ILayerSupportSharedPtr layerSupport
127  {
129  };
130  return layerSupport;
131 }
132 
134 {
135  static ILayerSupportSharedPtr layerSupport
136  {
138  };
139  return layerSupport;
140 }
141 
143 {
144  OptimizationViews optimizationViews;
145 
146  auto it = subgraph.endIConnectable();
147  std::map<LayerGuid, Layer*> untouched;
148 
149  while (it != subgraph.beginIConnectable())
150  {
151  --it;
152  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
153  untouched.insert({base.GetGuid(), &base});
154  }
155 
156  it = subgraph.endIConnectable();
157  while (it != subgraph.beginIConnectable())
158  {
159  --it;
160  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
161 
162  // Fuse activation into previous layer if supported by backend
166  || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
167  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
168  {
169  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
170  {
171  if (output->GetNumConnections() == 1)
172  {
173  for (auto&& childInput : output->GetConnections())
174  {
175  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
176  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
177  {
178  Layer& child = childInput->GetOwningLayer();
179 
180  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
181 
182  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
183  base.GetName();
184 
185  // Get params from activation layer
186  ActivationDescriptor activationDesc = activationLayer->GetParameters();
187 
188  if (base.GetType() == LayerType::Convolution2d)
189  {
190  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
191 
192  Optional<TensorInfo> biases;
193 
194  if (baseLayer->GetParameters().m_BiasEnabled)
195  {
196  biases = baseLayer->m_Bias->GetTensorInfo();
197  }
198 
201  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
202  baseLayer->GetParameters(),
203  baseLayer->m_Weight->GetTensorInfo(),
204  biases,
205  false,
206  &activationDesc);
207 
208  if (status)
209  {
210  FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
211  baseLayer,
212  activationLayer,
213  activationDesc,
214  name);
215  untouched.erase(baseLayer->GetGuid());
216  untouched.erase(activationLayer->GetGuid());
217  }
218  }
219  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
220  {
221  DepthwiseConvolution2dLayer* baseLayer =
222  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
223 
224  Optional<TensorInfo> biases;
225 
226  if (baseLayer->GetParameters().m_BiasEnabled)
227  {
228  biases = baseLayer->m_Bias->GetTensorInfo();
229  }
230 
233  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
234  baseLayer->GetParameters(),
235  baseLayer->m_Weight->GetTensorInfo(),
236  biases,
237  &activationDesc);
238 
239  if (status)
240  {
241  FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
242  baseLayer,
243  activationLayer,
244  activationDesc,
245  name);
246  untouched.erase(baseLayer->GetGuid());
247  untouched.erase(activationLayer->GetGuid());
248  }
249  }
250  else if (base.GetType() == LayerType::FullyConnected)
251  {
252  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
253  Optional<TensorInfo> biases;
254 
255  if (baseLayer->GetParameters().m_BiasEnabled)
256  {
257  biases = baseLayer->m_Bias->GetTensorInfo();
258  }
259 
262  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
263  baseLayer->m_Weight->GetTensorInfo(),
264  biases,
265  baseLayer->GetParameters(),
266  &activationDesc);
267 
268  if (status)
269  {
270  FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
271  baseLayer,
272  activationLayer,
273  activationDesc,
274  name);
275  untouched.erase(baseLayer->GetGuid());
276  untouched.erase(activationLayer->GetGuid());
277  }
278  }
279  else if (base.GetType() == LayerType::BatchNormalization)
280  {
281  BatchNormalizationLayer* baseLayer =
282  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
283 
286  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
287  baseLayer->m_Mean->GetTensorInfo(),
288  baseLayer->m_Variance->GetTensorInfo(),
289  baseLayer->m_Beta->GetTensorInfo(),
290  baseLayer->m_Gamma->GetTensorInfo(),
291  baseLayer->GetParameters(),
292  &activationDesc);
293 
294  if (status)
295  {
296  BatchNormalizationLayer* replacementLayer =
297  FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
298  baseLayer,
299  activationLayer,
300  activationDesc,
301  name);
302 
303  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
304  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
305  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
306  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
307  untouched.erase(baseLayer->GetGuid());
308  untouched.erase(activationLayer->GetGuid());
309  }
310  }
311  else if (base.GetType() == LayerType::Addition)
312  {
313  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
314 
318  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
319  &activationDesc);
320 
321  if (status)
322  {
323  FuseAdditionLayer<AdditionLayer>(optimizationViews,
324  baseLayer,
325  activationLayer,
326  activationDesc,
327  name);
328  untouched.erase(baseLayer->GetGuid());
329  untouched.erase(activationLayer->GetGuid());
330  }
331  }
332  else if (base.GetType() == LayerType::Division)
333  {
334  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
335 
339  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
340  &activationDesc);
341 
342  if (status)
343  {
344  FuseDivisionLayer<DivisionLayer>(optimizationViews,
345  baseLayer,
346  activationLayer,
347  activationDesc,
348  name);
349  untouched.erase(baseLayer->GetGuid());
350  untouched.erase(activationLayer->GetGuid());
351  }
352  }
353  else if (base.GetType() == LayerType::Multiplication)
354  {
355  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
356 
360  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
361  &activationDesc);
362 
363  if (status)
364  {
365  FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
366  baseLayer,
367  activationLayer,
368  activationDesc,
369  name);
370  untouched.erase(baseLayer->GetGuid());
371  untouched.erase(activationLayer->GetGuid());
372  }
373  }
374  else if (base.GetType() == LayerType::Subtraction)
375  {
376  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
377 
381  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
382  &activationDesc);
383 
384  if (status)
385  {
386  FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
387  baseLayer,
388  activationLayer,
389  activationDesc,
390  name);
391  untouched.erase(baseLayer->GetGuid());
392  untouched.erase(activationLayer->GetGuid());
393  }
394  }
395  }
396  }
397  }
398  }
399  }
400 
401  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
402  if (base.GetType() == LayerType::Reduce)
403  {
404  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
405  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
406 
407  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
408  {
409  // Add new layers to the graph and connect them.
410  std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
411  baseLayer,
412  reduceDescriptor);
413 
414  // Replace existing baselayer with new subgraph.
415  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
416  untouched.erase(baseLayer->GetGuid());
417  }
418  }
419  }
420 
421  if (optimizationViews.GetSubstitutions().empty())
422  {
423  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
424  }
425  else
426  {
427  ReportUntouchedLayers(optimizationViews, untouched);
428  }
429 
430  return optimizationViews;
431 }
432 
433 std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
434 {
435  return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
436 }
437 
439 {
440  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
442 
443  registry.RegisterMemoryManager(memoryManager);
444 
445  auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
446  // Register copy and import factory pair
447  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
448  // Register the factory
449  registry.RegisterFactory(std::move(factory));
450 }
451 
452 std::unique_ptr<ICustomAllocator> NeonBackend::GetDefaultAllocator() const
453 {
454  return std::make_unique<DefaultAllocator>();
455 }
456 
457 
458 } // namespace armnn
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
IConnectableLayerIterator endIConnectable()
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
This layer represents a depthwise convolution 2d operation.
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph) const override
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: NeonBackend.cpp:56
constexpr const char * NeonBackendId()
IConnectableLayerIterator beginIConnectable()
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
Copyright (c) 2021 ARM Limited and Contributors.
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:13
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The SubgraphView class represents a subgraph of a Graph.
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:321
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
A ReduceDescriptor for the REDUCE operators.
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
bool m_BiasEnabled
Enable/disable bias.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:270
Status
enumeration
Definition: Types.hpp:29
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:250
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:251
static const BackendId & GetIdStatic()
Definition: NeonBackend.cpp:44
static const FactoryId & GetIdStatic()
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:316
This layer represents a convolution 2d operation.
This layer represents a multiplication operation.
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:66
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we&#39;ll keep it in the backend namespace...
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:347
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: NeonBackend.cpp:50
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:327
std::unique_ptr< IBackendContext > IBackendContextPtr