ArmNN
 21.11
NeonBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackend.hpp"
7 #include "NeonBackendId.hpp"
10 #include "NeonLayerSupport.hpp"
12 
14 #include <armnn/Descriptors.hpp>
15 
19 
22 
24 
35 
36 #include <Optimizer.hpp>
37 
38 #include <arm_compute/core/Types.h>
39 #include <arm_compute/runtime/Allocator.h>
40 
41 namespace armnn
42 {
43 
45 {
46  static const BackendId s_Id{NeonBackendId()};
47  return s_Id;
48 }
49 
51 {
52  return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
54 }
55 
57  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
58 {
59  return std::make_unique<NeonWorkloadFactory>(
60  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
61 }
62 
64  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
65 {
66  return std::make_unique<NeonWorkloadFactory>(
67  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
68 }
69 
71  class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
72 {
73  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
75 
76  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
77  tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
78 
79  return std::make_unique<NeonWorkloadFactory>(
80  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
81 }
82 
84  TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
85 {
86  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
88 
89  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
90  tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
91 
92  return std::make_unique<NeonWorkloadFactory>(
93  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
94 }
95 
97 {
98  return IBackendContextPtr{};
99 }
100 
103 {
105 }
106 
108  const ModelOptions& modelOptions) const
109 {
111 }
112 
114 {
115  static ILayerSupportSharedPtr layerSupport
116  {
118  };
119  return layerSupport;
120 }
121 
123 {
124  static ILayerSupportSharedPtr layerSupport
125  {
127  };
128  return layerSupport;
129 }
130 
132 {
133  OptimizationViews optimizationViews;
134 
135  auto it = subgraph.end();
136  std::map<LayerGuid, Layer*> untouched;
137 
138  while (it != subgraph.begin())
139  {
140  --it;
141  Layer& base = **it;
142  untouched.insert({base.GetGuid(), &base});
143  }
144 
145  it = subgraph.end();
146  while (it != subgraph.begin())
147  {
148  --it;
149  Layer& base = **it;
150 
151  // Fuse activation into previous layer if supported by backend
155  || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
156  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
157  {
158  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
159  {
160  if (output->GetNumConnections() == 1)
161  {
162  for (auto&& childInput : output->GetConnections())
163  {
164  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
165  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
166  {
167  Layer& child = childInput->GetOwningLayer();
168 
169  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
170 
171  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
172  base.GetName();
173 
174  // Get params from activation layer
175  ActivationDescriptor activationDesc = activationLayer->GetParameters();
176 
177  if (base.GetType() == LayerType::Convolution2d)
178  {
179  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
180 
181  Optional<TensorInfo> biases;
182 
183  if (baseLayer->GetParameters().m_BiasEnabled)
184  {
185  biases = baseLayer->m_Bias->GetTensorInfo();
186  }
187 
190  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
191  baseLayer->GetParameters(),
192  baseLayer->m_Weight->GetTensorInfo(),
193  biases,
194  false,
195  &activationDesc);
196 
197  if (status)
198  {
199  FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
200  baseLayer,
201  activationLayer,
202  activationDesc,
203  name);
204  untouched.erase(baseLayer->GetGuid());
205  untouched.erase(activationLayer->GetGuid());
206  }
207  }
208  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
209  {
210  DepthwiseConvolution2dLayer* baseLayer =
211  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
212 
213  Optional<TensorInfo> biases;
214 
215  if (baseLayer->GetParameters().m_BiasEnabled)
216  {
217  biases = baseLayer->m_Bias->GetTensorInfo();
218  }
219 
222  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
223  baseLayer->GetParameters(),
224  baseLayer->m_Weight->GetTensorInfo(),
225  biases,
226  &activationDesc);
227 
228  if (status)
229  {
230  FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
231  baseLayer,
232  activationLayer,
233  activationDesc,
234  name);
235  untouched.erase(baseLayer->GetGuid());
236  untouched.erase(activationLayer->GetGuid());
237  }
238  }
239  else if (base.GetType() == LayerType::FullyConnected)
240  {
241  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
242 
245  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
246  baseLayer->m_Weight->GetTensorInfo(),
247  baseLayer->m_Bias->GetTensorInfo(),
248  baseLayer->GetParameters(),
249  &activationDesc);
250 
251  if (status)
252  {
253  FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
254  baseLayer,
255  activationLayer,
256  activationDesc,
257  name);
258  untouched.erase(baseLayer->GetGuid());
259  untouched.erase(activationLayer->GetGuid());
260  }
261  }
262  else if (base.GetType() == LayerType::BatchNormalization)
263  {
264  BatchNormalizationLayer* baseLayer =
265  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
266 
269  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
270  baseLayer->m_Mean->GetTensorInfo(),
271  baseLayer->m_Variance->GetTensorInfo(),
272  baseLayer->m_Beta->GetTensorInfo(),
273  baseLayer->m_Gamma->GetTensorInfo(),
274  baseLayer->GetParameters(),
275  &activationDesc);
276 
277  if (status)
278  {
279  BatchNormalizationLayer* replacementLayer =
280  FuseLayerWithParameters<BatchNormalizationLayer>(
281  optimizationViews,
282  baseLayer,
283  activationLayer,
284  activationDesc,
285  name);
286 
287  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
288  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
289  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
290  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
291  untouched.erase(baseLayer->GetGuid());
292  untouched.erase(activationLayer->GetGuid());
293  }
294  }
295  else if (base.GetType() == LayerType::Addition)
296  {
297  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
298 
302  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
303  &activationDesc);
304 
305  if (status)
306  {
307  FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
308  baseLayer,
309  activationLayer,
310  activationDesc,
311  name);
312  untouched.erase(baseLayer->GetGuid());
313  untouched.erase(activationLayer->GetGuid());
314  }
315  }
316  else if (base.GetType() == LayerType::Division)
317  {
318  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
319 
323  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
324  &activationDesc);
325 
326  if (status)
327  {
328  FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
329  baseLayer,
330  activationLayer,
331  activationDesc,
332  name);
333  untouched.erase(baseLayer->GetGuid());
334  untouched.erase(activationLayer->GetGuid());
335  }
336  }
337  else if (base.GetType() == LayerType::Multiplication)
338  {
339  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
340 
344  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
345  &activationDesc);
346 
347  if (status)
348  {
349  FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
350  baseLayer,
351  activationLayer,
352  activationDesc,
353  name);
354  untouched.erase(baseLayer->GetGuid());
355  untouched.erase(activationLayer->GetGuid());
356  }
357  }
358  else if (base.GetType() == LayerType::Subtraction)
359  {
360  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
361 
365  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
366  &activationDesc);
367 
368  if (status)
369  {
370  FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
371  baseLayer,
372  activationLayer,
373  activationDesc,
374  name);
375  untouched.erase(baseLayer->GetGuid());
376  untouched.erase(activationLayer->GetGuid());
377  }
378  }
379  }
380  }
381  }
382  }
383  }
384 
385  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
386  if (base.GetType() == LayerType::Reduce)
387  {
388  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
389  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
390 
391  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
392  {
393  // Add new layers to the graph and connect them.
394  std::vector<Layer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
395  baseLayer,
396  reduceDescriptor);
397 
398  // Replace existing baselayer with new subgraph.
399  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
400  untouched.erase(baseLayer->GetGuid());
401  }
402  }
403  }
404 
405  if (optimizationViews.GetSubstitutions().empty())
406  {
407  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
408  }
409  else
410  {
411  ReportUntouchedLayers(optimizationViews, untouched);
412  }
413 
414  return optimizationViews;
415 }
416 
417 std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
418 {
419  return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
420 }
421 
423 {
424  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
426 
427  registry.RegisterMemoryManager(memoryManager);
428  registry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
429 }
430 
431 std::unique_ptr<ICustomAllocator> NeonBackend::GetDefaultAllocator() const
432 {
433  return std::make_unique<DefaultAllocator>();
434 }
435 
436 
437 } // namespace armnn
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
const Parameters & GetParameters() const
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
This layer represents a depthwise convolution 2d operation.
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph) const override
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: NeonBackend.cpp:56
constexpr const char * NeonBackendId()
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: NeonBackend.cpp:96
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Copyright (c) 2021 ARM Limited and Contributors.
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:13
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
The SubgraphView class represents a subgraph of a Graph.
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:316
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
A ReduceDescriptor for the REDUCE operators.
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:29
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:55
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:245
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:246
static const BackendId & GetIdStatic()
Definition: NeonBackend.cpp:44
static const FactoryId & GetIdStatic()
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:311
This layer represents a convolution 2d operation.
This layer represents a multiplication operation.
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:63
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we&#39;ll keep it in the backend namespace...
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:342
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: NeonBackend.cpp:50
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:322
std::unique_ptr< IBackendContext > IBackendContextPtr