ArmNN
 24.02
NeonBackendOptimizationUtils.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
9 
10 namespace armnn
11 {
12 
13 // Changes shapes of the form [1, 1, ..., W] to [ W ]
15 {
16  unsigned int numDimensions = in.GetNumDimensions();
17  for (unsigned int i = 0; i < (numDimensions-1); ++i)
18  {
19  if (in.GetShape()[i] != 1)
20  {
21  return false;
22  }
23  }
24 
25  unsigned int w = in.GetShape()[numDimensions-1];
26  out = in;
27  out.SetShape({w});
28 
29  return true;
30 }
31 
32 //
33 // Build slot and tensor info lists for Add/Mul/Add replacement
34 //
35 template<typename SlotListType>
36 void BuildAddMulAddSlotLists(bool handleReLu,
37  bool multipleOutputs,
38  std::vector<SlotListType>& inputLayersSlotLists,
39  std::vector<SlotListType>& outputLayersSlotLists)
40 {
41  // Build input slot list
42  inputLayersSlotLists.push_back({0, 1}); // Add
43  inputLayersSlotLists.push_back({1}); // Mul
44  inputLayersSlotLists.push_back({1}); // Add
45  if (handleReLu)
46  {
47  inputLayersSlotLists.push_back({}); // Relu
48  }
49 
50  // Build output slot list
51  if (multipleOutputs)
52  {
53  outputLayersSlotLists.push_back({0}); // Add
54  }
55  else
56  {
57  outputLayersSlotLists.push_back({}); // Add
58  }
59  outputLayersSlotLists.push_back({}); // Mul
60  if (handleReLu)
61  {
62  outputLayersSlotLists.push_back({}); // Add
63  outputLayersSlotLists.push_back({0}); // Relu
64  }
65  else
66  {
67  outputLayersSlotLists.push_back({0}); // Add
68  }
69 }
70 
71 inline void GetFusedName(Layer *layerList[4], std::string& fusedName)
72 {
73  // Build the fused name string
74  fusedName = "fused";
75  for (unsigned int layerIdx = 0; layerIdx< 4; ++layerIdx)
76  {
77  if (! layerList[layerIdx])
78  {
79  break;
80  }
81  fusedName += "-";
82  fusedName += layerList[layerIdx]->GetNameStr();
83  }
84 }
85 
86 template<typename Type>
87 bool BuildAddMulAddTensorInfoLists(Type* layerList[4],
88  unsigned int& numInputs,
89  unsigned int& numOutputs,
90  std::vector<TensorInfo>& inputInfos,
91  std::vector<TensorInfo>& outputInfos,
92  const ActivationDescriptor*& activationDescriptor,
93  bool& fuseReLu)
94 {
95  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[0]);
96  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[1]);
97  ARMNN_THROW_INVALIDARG_IF_FALSE(layerList[2]);
98 
102 
103  fuseReLu = (layerList[3] != nullptr);
104  if (fuseReLu)
105  {
106  activationDescriptor = &PolymorphicDowncast<ActivationLayer *>(layerList[3])->GetParameters();
108  (activationDescriptor->m_Function == ActivationFunction::BoundedReLu));
109  }
110 
111  numInputs = 0;
112  numOutputs = 0;
113 
114  // Ensure that there are 6 input slots in the add/mul/add layers
115  // we are going to replace
116  unsigned int layerIdx = 0;
117  unsigned int inputSlotCount = 0;
118  for (layerIdx = 0; layerIdx < 3; ++layerIdx)
119  {
120  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumInputSlots(); ++slotIdx)
121  {
122  InputSlot* inputSlot = &layerList[layerIdx]->GetInputSlot(slotIdx);
123  OutputSlot* outputSlot = inputSlot->GetConnectedOutputSlot();
124  if (outputSlot)
125  {
126  if (layerIdx == 0)
127  {
128  // Always count the input connections of the first add
129  inputInfos.push_back(inputSlot->GetTensorInfo());
130  numInputs++;
131  }
132  else
133  {
134  // For subsequent layers, we skip connections to the previous layers in the counting
135  if (&outputSlot->GetOwningLayer() != layerList[layerIdx-1])
136  {
137  TensorInfo inputSlotInfo = inputSlot->GetTensorInfo();
138  if (numInputs == 2 || numInputs == 3)
139  {
140  // Workaround the broadcast optimization to collapse shapes such as
141  // [1, 1, 1, 2] to [2] as required by backend
142  if (CollapseLeadingUnitDimensions(inputSlot->GetTensorInfo(), inputSlotInfo))
143  {
144  OutputSlot* previousLayerSlot = inputSlot->GetConnectedOutputSlot();
145  if (previousLayerSlot)
146  {
147  if (previousLayerSlot->GetOwningLayer().GetType() == LayerType::Constant)
148  {
149  // First update the TensorInfo in the constant owning layer
150  previousLayerSlot->SetTensorInfo(inputSlotInfo);
151  // Then update the TensorInfo in the workload for the owning layer
152  ConstantLayer* layer = PolymorphicDowncast<ConstantLayer*>(
153  &previousLayerSlot->GetOwningLayer());
154  layer->m_LayerOutput
155  = std::make_unique<ScopedTensorHandle>(
156  ConstTensor(inputSlotInfo,
157  layer->m_LayerOutput.get()->GetConstTensor<void>()));
158  }
159  }
160  }
161  }
162  inputInfos.push_back(inputSlotInfo);
163  numInputs++;
164  }
165  }
166  inputSlotCount++;
167  }
168  }
169  }
170 
171  // Check the input counts
172  bool validInputCount = (inputSlotCount == 6) && (inputInfos.size() == 4);
173  if (! validInputCount)
174  {
175  return false;
176  }
177 
178  const unsigned int maxIdx = (fuseReLu) ? 4 : 3;
179  for (layerIdx = 0; layerIdx < maxIdx; ++layerIdx)
180  {
181  for (unsigned int slotIdx = 0; slotIdx < layerList[layerIdx]->GetNumOutputSlots(); ++slotIdx)
182  {
183  OutputSlot* outputSlot = &layerList[layerIdx]->GetOutputSlot(slotIdx);
184 
185  for (unsigned int connectionIdx = 0; connectionIdx < outputSlot->GetNumConnections(); ++connectionIdx)
186  {
187  InputSlot* inputSlot = outputSlot->GetConnection(connectionIdx);
188  if (layerIdx < (maxIdx-1))
189  {
190  if (&inputSlot->GetOwningLayer() != layerList[layerIdx+1])
191  {
192  outputInfos.push_back(outputSlot->GetTensorInfo());
193  numOutputs++;
194  }
195  }
196  else if (layerList[layerIdx] != nullptr)
197  {
198  outputInfos.push_back(outputSlot->GetTensorInfo());
199  numOutputs++;
200  }
201  }
202  }
203  }
204 
205  // Check the output count
206  bool validOutputCount = (outputInfos.size() > 0);
207  if (! validOutputCount)
208  {
209  return false;
210  }
211 
212  return true;
213 }
214 
215 }
armnn::BinaryOperation::Mul
@ Mul
armnn::ActivationDescriptor
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
armnn::BinaryOperation::Add
@ Add
armnn::InputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:53
armnn::OutputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Definition: Layer.cpp:92
armnn::CollapseLeadingUnitDimensions
bool CollapseLeadingUnitDimensions(const TensorInfo &in, TensorInfo &out)
Definition: NeonBackendOptimizationUtils.hpp:14
armnn::OutputSlot
Definition: Layer.hpp:100
armnn::OutputSlot::SetTensorInfo
void SetTensorInfo(const TensorInfo &tensorInfo) override
Definition: Layer.cpp:87
armnn::BuildAddMulAddSlotLists
void BuildAddMulAddSlotLists(bool handleReLu, bool multipleOutputs, std::vector< SlotListType > &inputLayersSlotLists, std::vector< SlotListType > &outputLayersSlotLists)
Definition: NeonBackendOptimizationUtils.hpp:36
armnn::TensorInfo
Definition: Tensor.hpp:152
armnn::IsSequenceLayerType
bool IsSequenceLayerType(Layer &layer, LayerType type)
Definition: SubgraphUtils.hpp:362
armnn::GetFusedName
void GetFusedName(Layer *layerList[4], std::string &fusedName)
Definition: NeonBackendOptimizationUtils.hpp:71
armnn::TensorInfo::GetNumDimensions
unsigned int GetNumDimensions() const
Definition: Tensor.hpp:197
armnn::ActivationFunction::BoundedReLu
@ BoundedReLu
min(a, max(b, input)) ReLu1 & ReLu6.
armnn::Layer
Definition: Layer.hpp:230
armnn::InputSlot::GetTensorInfo
const TensorInfo & GetTensorInfo() const override
Gets the TensorInfo for this InputSlot.
Definition: Layer.cpp:592
armnn::OutputSlot::GetOwningLayer
Layer & GetOwningLayer() const
Definition: Layer.hpp:132
ARMNN_THROW_INVALIDARG_IF_FALSE
#define ARMNN_THROW_INVALIDARG_IF_FALSE(_cond)
Definition: Exceptions.hpp:212
armnn::OutputSlot::GetNumConnections
unsigned int GetNumConnections() const override
Definition: Layer.hpp:158
armnn::ActivationDescriptor::m_Function
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu,...
Definition: Descriptors.hpp:59
armnn::BuildAddMulAddTensorInfoLists
bool BuildAddMulAddTensorInfoLists(Type *layerList[4], unsigned int &numInputs, unsigned int &numOutputs, std::vector< TensorInfo > &inputInfos, std::vector< TensorInfo > &outputInfos, const ActivationDescriptor *&activationDescriptor, bool &fuseReLu)
Definition: NeonBackendOptimizationUtils.hpp:87
armnn::Layer::GetNameStr
const std::string & GetNameStr() const
Definition: Layer.hpp:240
armnn::InputSlot
Definition: Layer.hpp:42
armnn::Layer::GetType
LayerType GetType() const override
Returns the armnn::LayerType of this layer.
Definition: Layer.hpp:286
armnn::TensorInfo::GetShape
const TensorShape & GetShape() const
Definition: Tensor.hpp:193
armnn::ConstantLayer::m_LayerOutput
std::shared_ptr< ConstTensorHandle > m_LayerOutput
Definition: ConstantLayer.hpp:46
armnn::ActivationFunction::ReLu
@ ReLu
armnn::InputSlot::GetConnectedOutputSlot
const OutputSlot * GetConnectedOutputSlot() const
Definition: Layer.hpp:56
armnn::ConstantLayer
A layer that the constant data can be bound to.
Definition: ConstantLayer.hpp:15
armnn::TensorInfo::SetShape
void SetShape(const TensorShape &newShape)
Definition: Tensor.hpp:195
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::ConstTensor
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:329
armnn::OutputSlot::GetConnection
const InputSlot * GetConnection(unsigned int index) const override
Definition: Layer.cpp:75
ArmComputeSubgraphUtils.hpp
armnn::LayerType::Constant
@ Constant