ArmNN
 22.05.01
NeonBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackend.hpp"
7 #include "NeonBackendId.hpp"
10 #include "NeonLayerSupport.hpp"
12 
14 #include <armnn/Descriptors.hpp>
15 
19 
22 
24 
35 
36 #include <Optimizer.hpp>
37 
38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/Allocator.h>
40 
41 namespace armnn
42 {
43 
45 {
46  static const BackendId s_Id{NeonBackendId()};
47  return s_Id;
48 }
49 
51 {
52  return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
54 }
55 
57  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
58 {
59  return std::make_unique<NeonWorkloadFactory>(
60  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
61 }
62 
64  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
65 {
66  return std::make_unique<NeonWorkloadFactory>(
67  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
68 }
69 
71  class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
72 {
73  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
75 
76  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
77 
78  auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
79  // Register copy and import factory pair
80  tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
81  // Register the factory
82  tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
83 
84 
85  return std::make_unique<NeonWorkloadFactory>(
86  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
87 }
88 
90  TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
91 {
92  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
94 
95  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
96 
97  auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
98  // Register copy and import factory pair
99  tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
100  // Register the factory
101  tensorHandleFactoryRegistry.RegisterFactory(std::move(factory));
102 
103  return std::make_unique<NeonWorkloadFactory>(
104  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
105 }
106 
108 {
109  return IBackendContextPtr{};
110 }
111 
114 {
116 }
117 
119  const ModelOptions& modelOptions) const
120 {
122 }
123 
125 {
126  static ILayerSupportSharedPtr layerSupport
127  {
129  };
130  return layerSupport;
131 }
132 
134 {
135  static ILayerSupportSharedPtr layerSupport
136  {
138  };
139  return layerSupport;
140 }
141 
143  const ModelOptions& modelOptions) const
144 {
145  OptimizationViews optimizationViews(modelOptions);
146 
147  auto it = subgraph.endIConnectable();
148  std::map<LayerGuid, Layer*> untouched;
149 
150  while (it != subgraph.beginIConnectable())
151  {
152  --it;
153  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
154  untouched.insert({base.GetGuid(), &base});
155  }
156 
157  it = subgraph.endIConnectable();
158  while (it != subgraph.beginIConnectable())
159  {
160  --it;
161  Layer& base = *(PolymorphicDowncast<Layer*>(*it));
162 
163  // Fuse activation into previous layer if supported by backend
167  || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
168  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
169  {
170  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
171  {
172  if (output->GetNumConnections() == 1)
173  {
174  for (auto&& childInput : output->GetConnections())
175  {
176  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
177  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
178  {
179  Layer& child = childInput->GetOwningLayer();
180 
181  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
182 
183  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
184  base.GetName();
185 
186  // Get params from activation layer
187  ActivationDescriptor activationDesc = activationLayer->GetParameters();
188 
189  if (base.GetType() == LayerType::Convolution2d)
190  {
191  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
192 
193  Optional<TensorInfo> biases;
194 
195  if (baseLayer->GetParameters().m_BiasEnabled)
196  {
197  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
198  }
199 
202  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
203  baseLayer->GetParameters(),
205  biases,
206  false,
207  &activationDesc);
208 
209  if (status)
210  {
211  FuseConvolution2dLayer<Convolution2dLayer>(optimizationViews,
212  baseLayer,
213  activationLayer,
214  activationDesc,
215  name);
216  untouched.erase(baseLayer->GetGuid());
217  untouched.erase(activationLayer->GetGuid());
218  }
219  }
220  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
221  {
222  DepthwiseConvolution2dLayer* baseLayer =
223  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
224 
225  Optional<TensorInfo> biases;
226 
227  if (baseLayer->GetParameters().m_BiasEnabled)
228  {
229  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
230  }
231 
234  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
235  baseLayer->GetParameters(),
237  biases,
238  &activationDesc);
239 
240  if (status)
241  {
242  FuseDepthwiseConvolution2dLayer<DepthwiseConvolution2dLayer>(optimizationViews,
243  baseLayer,
244  activationLayer,
245  activationDesc,
246  name);
247  untouched.erase(baseLayer->GetGuid());
248  untouched.erase(activationLayer->GetGuid());
249  }
250  }
251  else if (base.GetType() == LayerType::FullyConnected)
252  {
253  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
254  FullyConnectedDescriptor descriptor = baseLayer->GetParameters();
255 
256  // As bias is optional only try to get TensorInfo from input if bias is enabled.
257  Optional<TensorInfo> biases;
258  if (descriptor.m_BiasEnabled)
259  {
260  biases = baseLayer->GetInputSlot(2).GetConnectedOutputSlot()->GetTensorInfo();
261  }
262 
265  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
267  biases,
268  baseLayer->GetParameters(),
269  &activationDesc);
270 
271  if (status)
272  {
273  FuseFullyConnectedLayer<FullyConnectedLayer>(optimizationViews,
274  baseLayer,
275  activationLayer,
276  activationDesc,
277  name);
278  untouched.erase(baseLayer->GetGuid());
279  untouched.erase(activationLayer->GetGuid());
280  }
281  }
282  else if (base.GetType() == LayerType::BatchNormalization)
283  {
284  BatchNormalizationLayer* baseLayer =
285  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
286 
289  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
290  baseLayer->m_Mean->GetTensorInfo(),
291  baseLayer->m_Variance->GetTensorInfo(),
292  baseLayer->m_Beta->GetTensorInfo(),
293  baseLayer->m_Gamma->GetTensorInfo(),
294  baseLayer->GetParameters(),
295  &activationDesc);
296 
297  if (status)
298  {
299  BatchNormalizationLayer* replacementLayer =
300  FuseBatchNormalizationLayer<BatchNormalizationLayer>(optimizationViews,
301  baseLayer,
302  activationLayer,
303  activationDesc,
304  name);
305 
306  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
307  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
308  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
309  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
310  untouched.erase(baseLayer->GetGuid());
311  untouched.erase(activationLayer->GetGuid());
312  }
313  }
314  else if (base.GetType() == LayerType::Addition)
315  {
316  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
317 
321  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
322  &activationDesc);
323 
324  if (status)
325  {
326  FuseAdditionLayer<AdditionLayer>(optimizationViews,
327  baseLayer,
328  activationLayer,
329  activationDesc,
330  name);
331  untouched.erase(baseLayer->GetGuid());
332  untouched.erase(activationLayer->GetGuid());
333  }
334  }
335  else if (base.GetType() == LayerType::Division)
336  {
337  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
338 
342  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
343  &activationDesc);
344 
345  if (status)
346  {
347  FuseDivisionLayer<DivisionLayer>(optimizationViews,
348  baseLayer,
349  activationLayer,
350  activationDesc,
351  name);
352  untouched.erase(baseLayer->GetGuid());
353  untouched.erase(activationLayer->GetGuid());
354  }
355  }
356  else if (base.GetType() == LayerType::Multiplication)
357  {
358  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
359 
363  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
364  &activationDesc);
365 
366  if (status)
367  {
368  FuseMultiplicationLayer<MultiplicationLayer>(optimizationViews,
369  baseLayer,
370  activationLayer,
371  activationDesc,
372  name);
373  untouched.erase(baseLayer->GetGuid());
374  untouched.erase(activationLayer->GetGuid());
375  }
376  }
377  else if (base.GetType() == LayerType::Subtraction)
378  {
379  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
380 
384  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
385  &activationDesc);
386 
387  if (status)
388  {
389  FuseSubtractionLayer<SubtractionLayer>(optimizationViews,
390  baseLayer,
391  activationLayer,
392  activationDesc,
393  name);
394  untouched.erase(baseLayer->GetGuid());
395  untouched.erase(activationLayer->GetGuid());
396  }
397  }
398  }
399  }
400  }
401  }
402  }
403 
404  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
405  if (base.GetType() == LayerType::Reduce)
406  {
407  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
408  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
409 
410  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
411  {
412  // Add new layers to the graph and connect them.
413  std::vector<IConnectableLayer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
414  baseLayer,
415  reduceDescriptor);
416 
417  // Replace existing baselayer with new subgraph.
418  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
419  untouched.erase(baseLayer->GetGuid());
420  }
421  }
422  }
423 
424  if (optimizationViews.GetSubstitutions().empty())
425  {
426  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
427  }
428  else
429  {
430  ReportUntouchedLayers(optimizationViews, untouched);
431  }
432 
433  return optimizationViews;
434 }
435 
436 std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
437 {
438  return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
439 }
440 
442 {
443  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
445 
446  registry.RegisterMemoryManager(memoryManager);
447 
448  auto factory = std::make_unique<NeonTensorHandleFactory>(memoryManager);
449  // Register copy and import factory pair
450  registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId());
451  // Register the factory
452  registry.RegisterFactory(std::move(factory));
453 }
454 
455 std::unique_ptr<ICustomAllocator> NeonBackend::GetDefaultAllocator() const
456 {
457  return std::make_unique<DefaultAllocator>();
458 }
459 
460 
461 } // namespace armnn
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
IConnectableLayerIterator endIConnectable()
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
This layer represents a depthwise convolution 2d operation.
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: NeonBackend.cpp:56
constexpr const char * NeonBackendId()
IConnectableLayerIterator beginIConnectable()
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const Optional< TensorInfo > &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
Copyright (c) 2021 ARM Limited and Contributors.
const Parameters & GetParameters() const override
If the layer has a descriptor return it.
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:13
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The SubgraphView class represents a subgraph of a Graph.
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
void RegisterCopyAndImportFactoryPair(ITensorHandleFactory::FactoryId copyFactoryId, ITensorHandleFactory::FactoryId importFactoryId)
Register a pair of TensorHandleFactory Id for Memory Copy and TensorHandleFactory Id for Memory Impor...
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:322
This layer represents a fully connected operation.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
A ReduceDescriptor for the REDUCE operators.
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
A FullyConnectedDescriptor for the FullyConnectedLayer.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:271
Status
enumeration
Definition: Types.hpp:42
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we&#39;ll keep it in the backend namespace...
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:251
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:252
static const BackendId & GetIdStatic()
Definition: NeonBackend.cpp:44
static const FactoryId & GetIdStatic()
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:317
This layer represents a convolution 2d operation.
This layer represents a multiplication operation.
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:353
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: NeonBackend.cpp:50
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:328
std::unique_ptr< IBackendContext > IBackendContextPtr