ArmNN
 21.08
NeonBackend.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "NeonBackend.hpp"
7 #include "NeonBackendId.hpp"
10 #include "NeonLayerSupport.hpp"
12 
14 #include <armnn/Descriptors.hpp>
15 
19 
22 
24 
34 
35 #include <Optimizer.hpp>
36 
37 #include <arm_compute/core/Types.h>
38 #include <arm_compute/runtime/Allocator.h>
39 
40 namespace armnn
41 {
42 
44 {
45  static const BackendId s_Id{NeonBackendId()};
46  return s_Id;
47 }
48 
50 {
51  return std::make_unique<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
53 }
54 
56  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const
57 {
58  return std::make_unique<NeonWorkloadFactory>(
59  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
60 }
61 
63  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const
64 {
65  return std::make_unique<NeonWorkloadFactory>(
66  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
67 }
68 
70  class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const
71 {
72  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
74 
75  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
76  tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
77 
78  return std::make_unique<NeonWorkloadFactory>(
79  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager));
80 }
81 
83  TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const
84 {
85  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
87 
88  tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager);
89  tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
90 
91  return std::make_unique<NeonWorkloadFactory>(
92  PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions));
93 }
94 
96 {
97  return IBackendContextPtr{};
98 }
99 
102 {
104 }
105 
107 {
108  return Optimizations{};
109 }
110 
112  const ModelOptions& modelOptions) const
113 {
115 }
116 
118 {
119  static ILayerSupportSharedPtr layerSupport
120  {
122  };
123  return layerSupport;
124 }
125 
127 {
128  static ILayerSupportSharedPtr layerSupport
129  {
131  };
132  return layerSupport;
133 }
134 
136 {
137  OptimizationViews optimizationViews;
138 
139  auto it = subgraph.end();
140  std::map<LayerGuid, Layer*> untouched;
141 
142  while (it != subgraph.begin())
143  {
144  --it;
145  Layer& base = **it;
146  untouched.insert({base.GetGuid(), &base});
147  }
148 
149  it = subgraph.end();
150  while (it != subgraph.begin())
151  {
152  --it;
153  Layer& base = **it;
154 
155  // Fuse activation into previous layer if supported by backend
159  || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
160  && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
161  {
162  for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
163  {
164  if (output->GetNumConnections() == 1)
165  {
166  for (auto&& childInput : output->GetConnections())
167  {
168  if ((childInput->GetOwningLayer().GetType() == LayerType::Activation) &&
169  (checkDataTypeInputandOutput(childInput->GetOwningLayer())))
170  {
171  Layer& child = childInput->GetOwningLayer();
172 
173  auto* activationLayer = PolymorphicDowncast<ActivationLayer*>(&child);
174 
175  const std::string name = std::string("fused-") + child.GetName() + std::string("-into-") +
176  base.GetName();
177 
178  // Get params from activation layer
179  ActivationDescriptor activationDesc = activationLayer->GetParameters();
180 
181  if (base.GetType() == LayerType::Convolution2d)
182  {
183  Convolution2dLayer* baseLayer = PolymorphicDowncast<Convolution2dLayer*>(&base);
184 
185  Optional<TensorInfo> biases;
186 
187  if (baseLayer->GetParameters().m_BiasEnabled)
188  {
189  biases = baseLayer->m_Bias->GetTensorInfo();
190  }
191 
194  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
195  baseLayer->GetParameters(),
196  baseLayer->m_Weight->GetTensorInfo(),
197  biases,
198  false,
199  &activationDesc);
200 
201  if (status)
202  {
203  FuseLayerWithWeightsAndBiases<Convolution2dLayer>(optimizationViews,
204  baseLayer,
205  activationLayer,
206  activationDesc,
207  name);
208  untouched.erase(baseLayer->GetGuid());
209  untouched.erase(activationLayer->GetGuid());
210  }
211  }
212  else if (base.GetType() == LayerType::DepthwiseConvolution2d)
213  {
214  DepthwiseConvolution2dLayer* baseLayer =
215  PolymorphicDowncast<DepthwiseConvolution2dLayer*>(&base);
216 
217  Optional<TensorInfo> biases;
218 
219  if (baseLayer->GetParameters().m_BiasEnabled)
220  {
221  biases = baseLayer->m_Bias->GetTensorInfo();
222  }
223 
226  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
227  baseLayer->GetParameters(),
228  baseLayer->m_Weight->GetTensorInfo(),
229  biases,
230  &activationDesc);
231 
232  if (status)
233  {
234  FuseLayerWithWeightsAndBiases<DepthwiseConvolution2dLayer>(optimizationViews,
235  baseLayer,
236  activationLayer,
237  activationDesc,
238  name);
239  untouched.erase(baseLayer->GetGuid());
240  untouched.erase(activationLayer->GetGuid());
241  }
242  }
243  else if (base.GetType() == LayerType::FullyConnected)
244  {
245  FullyConnectedLayer* baseLayer = PolymorphicDowncast<FullyConnectedLayer*>(&base);
246 
249  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
250  baseLayer->m_Weight->GetTensorInfo(),
251  baseLayer->m_Bias->GetTensorInfo(),
252  baseLayer->GetParameters(),
253  &activationDesc);
254 
255  if (status)
256  {
257  FuseLayerWithWeightsAndBiases<FullyConnectedLayer>(optimizationViews,
258  baseLayer,
259  activationLayer,
260  activationDesc,
261  name);
262  untouched.erase(baseLayer->GetGuid());
263  untouched.erase(activationLayer->GetGuid());
264  }
265  }
266  else if (base.GetType() == LayerType::BatchNormalization)
267  {
268  BatchNormalizationLayer* baseLayer =
269  PolymorphicDowncast<BatchNormalizationLayer*>(&base);
270 
273  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
274  baseLayer->m_Mean->GetTensorInfo(),
275  baseLayer->m_Variance->GetTensorInfo(),
276  baseLayer->m_Beta->GetTensorInfo(),
277  baseLayer->m_Gamma->GetTensorInfo(),
278  baseLayer->GetParameters(),
279  &activationDesc);
280 
281  if (status)
282  {
283  BatchNormalizationLayer* replacementLayer =
284  FuseLayerWithParameters<BatchNormalizationLayer>(
285  optimizationViews,
286  baseLayer,
287  activationLayer,
288  activationDesc,
289  name);
290 
291  replacementLayer->m_Beta = std::move(baseLayer->m_Beta);
292  replacementLayer->m_Gamma = std::move(baseLayer->m_Gamma);
293  replacementLayer->m_Mean = std::move(baseLayer->m_Mean);
294  replacementLayer->m_Variance = std::move(baseLayer->m_Variance);
295  untouched.erase(baseLayer->GetGuid());
296  untouched.erase(activationLayer->GetGuid());
297  }
298  }
299  else if (base.GetType() == LayerType::Addition)
300  {
301  AdditionLayer* baseLayer = PolymorphicDowncast<AdditionLayer*>(&base);
302 
306  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
307  &activationDesc);
308 
309  if (status)
310  {
311  FuseLayerWithoutParameters<AdditionLayer>(optimizationViews,
312  baseLayer,
313  activationLayer,
314  activationDesc,
315  name);
316  untouched.erase(baseLayer->GetGuid());
317  untouched.erase(activationLayer->GetGuid());
318  }
319  }
320  else if (base.GetType() == LayerType::Division)
321  {
322  DivisionLayer* baseLayer = PolymorphicDowncast<DivisionLayer*>(&base);
323 
327  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
328  &activationDesc);
329 
330  if (status)
331  {
332  FuseLayerWithoutParameters<DivisionLayer>(optimizationViews,
333  baseLayer,
334  activationLayer,
335  activationDesc,
336  name);
337  untouched.erase(baseLayer->GetGuid());
338  untouched.erase(activationLayer->GetGuid());
339  }
340  }
341  else if (base.GetType() == LayerType::Multiplication)
342  {
343  MultiplicationLayer* baseLayer = PolymorphicDowncast<MultiplicationLayer*>(&base);
344 
348  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
349  &activationDesc);
350 
351  if (status)
352  {
353  FuseLayerWithoutParameters<MultiplicationLayer>(optimizationViews,
354  baseLayer,
355  activationLayer,
356  activationDesc,
357  name);
358  untouched.erase(baseLayer->GetGuid());
359  untouched.erase(activationLayer->GetGuid());
360  }
361  }
362  else if (base.GetType() == LayerType::Subtraction)
363  {
364  SubtractionLayer* baseLayer = PolymorphicDowncast<SubtractionLayer*>(&base);
365 
369  activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
370  &activationDesc);
371 
372  if (status)
373  {
374  FuseLayerWithoutParameters<SubtractionLayer>(optimizationViews,
375  baseLayer,
376  activationLayer,
377  activationDesc,
378  name);
379  untouched.erase(baseLayer->GetGuid());
380  untouched.erase(activationLayer->GetGuid());
381  }
382  }
383  }
384  }
385  }
386  }
387  }
388 
389  // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
390  if (base.GetType() == LayerType::Reduce)
391  {
392  ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
393  ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
394 
395  if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
396  {
397  // Add new layers to the graph and connect them.
398  std::vector<Layer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
399  baseLayer,
400  reduceDescriptor);
401 
402  // Replace existing baselayer with new subgraph.
403  ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
404  untouched.erase(baseLayer->GetGuid());
405  }
406  }
407  }
408 
409  if (optimizationViews.GetSubstitutions().empty())
410  {
411  optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph));
412  }
413  else
414  {
415  ReportUntouchedLayers(optimizationViews, untouched);
416  }
417 
418  return optimizationViews;
419 }
420 
421 std::vector<ITensorHandleFactory::FactoryId> NeonBackend::GetHandleFactoryPreferences() const
422 {
423  return std::vector<ITensorHandleFactory::FactoryId>() = { NeonTensorHandleFactory::GetIdStatic() };
424 }
425 
427 {
428  auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(),
430 
431  registry.RegisterMemoryManager(memoryManager);
432  registry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager));
433 }
434 
435 } // namespace armnn
bool m_BiasEnabled
Enable/disable bias.
void RegisterMemoryManager(std::shared_ptr< IMemoryManager > memoryManger)
Register a memory manager with shared ownership.
This layer represents a batch normalization operation.
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
bool m_BiasEnabled
Enable/disable bias.
std::vector< OptimizationPtr > Optimizations
const Parameters & GetParameters() const
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
arm_compute::Status NeonBatchNormalizationValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &mean, const TensorInfo &var, const TensorInfo &beta, const TensorInfo &gamma, const BatchNormalizationDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
This layer represents a depthwise convolution 2d operation.
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph) const override
std::vector< BackendOptions > ModelOptions
void RegisterFactory(std::unique_ptr< ITensorHandleFactory > allocator)
Register a TensorHandleFactory and transfer ownership.
void ReportUntouchedLayers(OptimizationViews &optimizationViews, std::map< LayerGuid, Layer *> untouched)
arm_compute::Status NeonDepthwiseConvolutionWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const DepthwiseConvolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, const ActivationDescriptor *activationDescriptor)
IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: NeonBackend.cpp:55
constexpr const char * NeonBackendId()
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: NeonBackend.cpp:95
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< ConstTensorHandle > m_Mean
A unique pointer to store Mean values.
arm_compute::Status NeonFullyConnectedWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const TensorInfo &weights, const TensorInfo &biases, const FullyConnectedDescriptor &descriptor, const ActivationDescriptor *activationDescriptor)
Copyright (c) 2021 ARM Limited and Contributors.
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
arm_compute::Status NeonAdditionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents a reduction operation.
Definition: ReduceLayer.hpp:13
The NeonBackendModelContext is used to pass in Neon specific backend ModelOptions.
std::shared_ptr< ConstTensorHandle > m_Beta
A unique pointer to store Beta values.
The SubgraphView class represents a subgraph of a Graph.
arm_compute::Status NeonSubtractionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:316
std::unique_ptr< armnn::profiling::IBackendProfiling > IBackendProfilingPtr
This layer represents a fully connected operation.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
std::shared_ptr< IBackendModelContext > IBackendSpecificModelContextPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
A ReduceDescriptor for the REDUCE operators.
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:265
Status
enumeration
Definition: Types.hpp:29
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:55
IBackendInternal::IBackendSpecificModelContextPtr CreateBackendSpecificModelContext(const ModelOptions &modelOptions) const override
std::shared_ptr< ConstTensorHandle > m_Gamma
A unique pointer to store Gamma values.
arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo &input, const TensorInfo &output, const Convolution2dDescriptor &descriptor, const TensorInfo &weights, const Optional< TensorInfo > &biases, bool isFastMathEnabled, const ActivationDescriptor *activationDescriptor)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
void AddUntouchedSubgraph(SubgraphView &&subgraph)
std::shared_ptr< ConstTensorHandle > m_Variance
A unique pointer to store Variance values.
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
arm_compute::Status NeonDivisionWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
This layer represents an addition operation.
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
const Substitutions & GetSubstitutions() const
This layer represents a subtraction operation.
IBackendInternal::Optimizations GetOptimizations() const override
std::vector< OutputSlot >::iterator BeginOutputSlots()
Definition: Layer.hpp:245
std::shared_ptr< ConstTensorHandle > m_Bias
A unique pointer to store Bias values.
std::shared_ptr< ConstTensorHandle > m_Weight
A unique pointer to store Weight values.
This layer represents a division operation.
std::vector< OutputSlot >::iterator EndOutputSlots()
Definition: Layer.hpp:246
static const BackendId & GetIdStatic()
Definition: NeonBackend.cpp:43
static const FactoryId & GetIdStatic()
const char * GetName() const override
Returns the name of the layer.
Definition: Layer.hpp:311
This layer represents a convolution 2d operation.
This layer represents a multiplication operation.
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:63
std::shared_ptr< armnn::profiling::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we&#39;ll keep it in the backend namespace...
arm_compute::Status NeonMultiplicationWorkloadValidate(const TensorInfo &input0, const TensorInfo &input1, const TensorInfo &output, const ActivationDescriptor *activationDescriptor)
std::shared_ptr< T > GetAdditionalInformation() const
Definition: Layer.hpp:342
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: NeonBackend.cpp:49
LayerGuid GetGuid() const final
Returns the unique id of the layer.
Definition: Layer.hpp:322
std::unique_ptr< IBackendContext > IBackendContextPtr