ArmNN
 22.02
FuseActivationTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LayersFwd.hpp"
7 
8 #include <Network.hpp>
9 #include <ResolveType.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <GraphUtils.hpp>
12 #include <TestUtils.hpp>
13 
14 #include <doctest/doctest.h>
15 
17 #include <string>
18 
19 using namespace armnn;
20 
21 namespace
22 {
23 
24 template<typename T>
25 std::vector<T> GetVector(unsigned int size, float initial, float increment)
26 {
27  std::vector<float> typeVector(size, initial);
28  std::vector<T> vector(size);
29 
30  if (size > 1)
31  {
32  for (unsigned int i = 0; i < size; ++i)
33  {
34  vector[i] = T(initial + (increment * static_cast<float>(i)));
35  }
36  }
37  return vector;
38 }
39 
40 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
41 struct Convolution2dTest
42 {
44  static const bool isElementWise = false;
45  static const bool isConstTensorAsInputSupported = false;
46 
47  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
48  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
49  static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
50 
51  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
52  constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
53 
54  static IConnectableLayer* AddReceiverLayer(INetwork* network,
55  const char* name,
56  float scale = 1.f,
57  int32_t offset = 0)
58  {
59  Convolution2dDescriptor descriptor;
60  descriptor.m_DataLayout = DataLayout::NHWC;
61  descriptor.m_StrideX = 1;
62  descriptor.m_StrideY = 1;
63 
64  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
65  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
66  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
67  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
68  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
69  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
70  ConstTensor weights(weightsInfo, weightsVector);
71  Optional<ConstTensor> optionalBias;
72 
73  return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
74  }
75 
76  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
77  float scale = 1.f,
78  int32_t offset = 0)
79  {
80  IgnoreUnused(network);
81  IgnoreUnused(scale);
82  IgnoreUnused(offset);
83  return {};
84  }
85 };
86 
87 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
88 struct DWConvolution2dTest
89 {
90 public:
92  static const bool isElementWise = false;
93  static const bool isConstTensorAsInputSupported = false;
94 
95  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin]
96  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout]
97  static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); } // [1,H,W,Cout]
98 
99  constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
100  constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
101 
102  static IConnectableLayer* AddReceiverLayer(INetwork* network,
103  const char* name,
104  float scale = 1.f,
105  int32_t offset = 0)
106  {
108  descriptor.m_BiasEnabled = false;
109  descriptor.m_DataLayout = DataLayout::NHWC;
110  descriptor.m_StrideX = 1;
111  descriptor.m_StrideY = 1;
112 
113  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
114  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
115  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
116  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
117  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
118  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
119  ConstTensor weights(weightsInfo, weightsVector);
120  Optional<ConstTensor> optionalBias;
121 
122  return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
123  }
124 
125  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
126  float scale = 1.f,
127  int32_t offset = 0)
128  {
129  IgnoreUnused(network);
130  IgnoreUnused(scale);
131  IgnoreUnused(offset);
132  return {};
133  }
134 };
135 
136 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
137 struct FullyConnectedTest
138 {
139 public:
141  static const bool isElementWise = false;
142  static const bool isConstTensorAsInputSupported = true;
143 
144  static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
145  static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
146  static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
147 
148  constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
149  constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
150 
151  static IConnectableLayer* AddReceiverLayer(INetwork* network,
152  const char* name,
153  float scale = 1.f,
154  int32_t offset = 0)
155  {
156  IgnoreUnused(scale);
157  IgnoreUnused(offset);
158 
159  FullyConnectedDescriptor descriptor;
160  descriptor.m_BiasEnabled = false;
161 
162  return network->AddFullyConnectedLayer(descriptor, name);
163  }
164 
165  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
166  float scale = 1.f,
167  int32_t offset = 0)
168  {
169  std::vector<float> weightsData = { 1, 2, 3, 4, 5,
170  6, 7, 8, 9, 10,
171  11, 12, 13, 14, 15};
172  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
173  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
174  ConstTensor weights(weightsInfo, weightsVector);
175 
176  IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
177  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
178 
179  std::vector<IConnectableLayer*> layers = { weightsLayer };
180  return layers;
181  }
182 };
183 
184 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
185 struct BatchNormTest
186 {
187 public:
189  static const bool isElementWise = false;
190  static const bool isConstTensorAsInputSupported = false;
191 
192  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
193  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
194 
195  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
196  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
197 
198  static IConnectableLayer* AddReceiverLayer(INetwork* network,
199  const char* name,
200  float scale = 1.f,
201  int32_t offset = 0)
202  {
203  IgnoreUnused(scale);
204  IgnoreUnused(offset);
205 
206  BatchNormalizationDescriptor descriptor;
207  descriptor.m_DataLayout = DataLayout::NHWC;
208 
209  std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
210  std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
211  std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
212  std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
213 
214  const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
215  ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), betaVector);
216  ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), gammaVector);
217  ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), meanVector);
218  ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), varianceVector);
219 
220  return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
221  }
222 
223  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
224  float scale = 1.f,
225  int32_t offset = 0)
226  {
227  IgnoreUnused(network);
228  IgnoreUnused(scale);
229  IgnoreUnused(offset);
230  return {};
231  }
232 };
233 
234 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
235 struct MultiplicationTest
236 {
238  static const bool isElementWise = true;
239  static const bool isConstTensorAsInputSupported = false;
240 
241  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
242  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
243 
244  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
245  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
246 
247  static IConnectableLayer* AddReceiverLayer(INetwork* network,
248  const char* name,
249  float scale = 1.f,
250  int32_t offset = 0)
251  {
252  IgnoreUnused(scale);
253  IgnoreUnused(offset);
254 
255  return network->AddMultiplicationLayer(name);
256  }
257 
258  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
259  float scale = 1.f,
260  int32_t offset = 0)
261  {
262  IgnoreUnused(network);
263  IgnoreUnused(scale);
264  IgnoreUnused(offset);
265  return {};
266  }
267 };
268 
269 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
270 struct AdditionTest
271 {
272  using LayerType = AdditionLayer;
273  static const bool isElementWise = true;
274  static const bool isConstTensorAsInputSupported = false;
275 
276  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
277  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
278 
279  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
280  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
281 
282  static IConnectableLayer* AddReceiverLayer(INetwork* network,
283  const char* name,
284  float scale = 1.f,
285  int32_t offset = 0)
286  {
287  IgnoreUnused(scale);
288  IgnoreUnused(offset);
289 
290  return network->AddAdditionLayer(name);
291  }
292 
293  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
294  float scale = 1.f,
295  int32_t offset = 0)
296  {
297  IgnoreUnused(network);
298  IgnoreUnused(scale);
299  IgnoreUnused(offset);
300  return {};
301  }
302 };
303 
304 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
305 struct SubtractionTest
306 {
307  using LayerType = SubtractionLayer;
308  static const bool isElementWise = true;
309  static const bool isConstTensorAsInputSupported = false;
310 
311  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
312  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
313 
314  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
315  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
316 
317  static IConnectableLayer* AddReceiverLayer(INetwork* network,
318  const char* name,
319  float scale = 1.f,
320  int32_t offset = 0)
321  {
322  IgnoreUnused(scale);
323  IgnoreUnused(offset);
324 
325  return network->AddSubtractionLayer(name);
326  }
327 
328  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
329  float scale = 1.f,
330  int32_t offset = 0)
331  {
332  IgnoreUnused(network);
333  IgnoreUnused(scale);
334  IgnoreUnused(offset);
335  return {};
336  }
337 };
338 
339 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
340 struct DivisionTest
341 {
342  using LayerType = DivisionLayer;
343  static const bool isElementWise = true;
344  static const bool isConstTensorAsInputSupported = false;
345 
346  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
347  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
348 
349  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
350  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
351 
352  static IConnectableLayer* AddReceiverLayer(INetwork* network,
353  const char* name,
354  float scale = 1.f,
355  int32_t offset = 0)
356  {
357  IgnoreUnused(scale);
358  IgnoreUnused(offset);
359 
360  return network->AddDivisionLayer(name);
361  }
362 
363  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
364  float scale = 1.f,
365  int32_t offset = 0)
366  {
367  IgnoreUnused(network);
368  IgnoreUnused(scale);
369  IgnoreUnused(offset);
370  return {};
371  }
372 };
373 
374 template<typename LayerTest,
375  DataType ArmnnType>
376 INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
377  float scale, int32_t offset)
378 {
379  // Create a network
380  INetworkPtr network = INetwork::Create();
381 
382  IConnectableLayer* inputLayer = network->AddInputLayer(0);
383 
384  IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
385  "receiverLayer",
386  scale,
387  offset);
388 
389  IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
390  "activation");
391 
392  IConnectableLayer* outputLayer = network->AddOutputLayer(0);
393  IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
394 
395  // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
396  if(LayerTest::isConstTensorAsInputSupported)
397  {
398  std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
399  scale,
400  offset);
401 
402  // Connect constant layers to receiverLayer.
403  for (unsigned int i = 0; i < constantLayers.size(); ++i)
404  {
405  constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1));
406  }
407  }
408 
409  // Define layers information
410  TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
411  TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
412 
413  // Set layer information
414  inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
415  receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
416  activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
417 
418  // Connect layers
419  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
420  receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
421  activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
422 
423  if (LayerTest::isElementWise)
424  {
425  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
426  }
427  if (preventFusing)
428  {
429  receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
430  }
431 
432  return network;
433 }
434 
435 template<typename LayerTest,
436  DataType ArmnnType,
437  typename LayerType = typename LayerTest::LayerType,
438  typename T = ResolveType<ArmnnType>>
439 void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
440  float scale = 1.f, int32_t offset=0)
441 {
442  // FIRST NETWORK: Fused
443  // Construct ArmNN network
444  INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
445 
446  // Create ArmNN runtime
447  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
448 
449  // Optimise ArmNN network
450  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
451 
452  Graph& graphFused = GetGraphForTesting(optNetFused.get());
453 
454  auto checkFusedConv2d = [](const Layer* const layer)->bool {
455  return IsLayerOfType<LayerType>(layer) &&
456  (layer->GetNameStr() == "fused-activation-into-receiverLayer");
457  };
458 
459  // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
460  if(LayerTest::isConstTensorAsInputSupported)
461  {
462  CHECK(4 == graphFused.GetNumLayers());
463  CHECK(CheckSequence(graphFused.cbegin(),
464  graphFused.cend(),
465  &IsLayerOfType<InputLayer>,
466  &IsLayerOfType<ConstantLayer>,
467  checkFusedConv2d,
468  &IsLayerOfType<OutputLayer>));
469 
470  // Check if new constant layer is connected to fused receiver layer.
471  Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer");
472  CHECK(fusedReceiverLayer);
473  CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr);
474  }
475  else
476  {
477  CHECK(3 == graphFused.GetNumLayers());
478  CHECK(CheckSequence(graphFused.cbegin(),
479  graphFused.cend(),
480  &IsLayerOfType<InputLayer>,
481  checkFusedConv2d,
482  &IsLayerOfType<OutputLayer>));
483  }
484 
485  // Load network into runtime
486  NetworkId networkIdentifier;
487  CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
488 
489  //Creates structures for inputs and outputs.
490  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
491  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
492  std::vector<T> outputDataFused(LayerTest::outputSize);
493 
494  armnn::TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
495  inputTensorInfo.SetConstant(true);
496 
497  InputTensors inputTensorsFused{
498  {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
499  OutputTensors outputTensorsFused{
500  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
501 
502  // Execute network
503  CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) == Status::Success);
504 
505  // SECOND NETWORK: NotFused
506  // Construct ArmNN network
507  INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
508 
509  // Create ArmNN runtime
510  IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
511 
512  // Optimise ArmNN network
513  IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
514 
515  Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
516 
517  // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
518  if(LayerTest::isConstTensorAsInputSupported)
519  {
520  CHECK(6 == graphNotFused.GetNumLayers());
521  CHECK(CheckSequence(graphNotFused.cbegin(),
522  graphNotFused.cend(),
523  &IsLayerOfType<InputLayer>,
524  &IsLayerOfType<ConstantLayer>,
525  &IsLayerOfType<LayerType>,
526  &IsLayerOfType<ActivationLayer>,
527  &IsLayerOfType<OutputLayer>,
528  &IsLayerOfType<OutputLayer>));
529  }
530  else
531  {
532  CHECK(5 == graphNotFused.GetNumLayers());
533  CHECK(CheckSequence(graphNotFused.cbegin(),
534  graphNotFused.cend(),
535  &IsLayerOfType<InputLayer>,
536  &IsLayerOfType<LayerType>,
537  &IsLayerOfType<ActivationLayer>,
538  &IsLayerOfType<OutputLayer>,
539  &IsLayerOfType<OutputLayer>));
540  }
541 
542  // Load network into runtime
543  NetworkId networkIdentifierNotFused;
544  CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
545 
546  //Creates structures for inputs and outputs.
547  std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
548  std::vector<T> outputDataNotFused(LayerTest::outputSize);
549  std::vector<T> outputData2NotFused(LayerTest::outputSize);
550 
551  TensorInfo inputTensorInfoNotFused = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0);
552  inputTensorInfoNotFused.SetConstant(true);
553 
554  InputTensors inputTensorsNotFused{
555  {0, ConstTensor(inputTensorInfoNotFused, inputDataNotFused.data())}};
556  OutputTensors outputTensorsNotFused{
557  {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
558  {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
559 
560  // Execute network
561  CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
562  == Status::Success);
563 
564  // Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
565  for (unsigned int n = 0; n < outputDataFused.size(); ++n)
566  {
567  auto outputNotFused = static_cast<float>(outputDataNotFused[n]);
568  CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
569  }
570 }
571 
572 template<typename LayerTest,
573  DataType ArmnnType,
574  typename LayerType = typename LayerTest::LayerType,
575  typename T = ResolveType<ArmnnType>>
576 bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
577  float scale = 1.f, int32_t offset = 0)
578 {
579  bool success;
580  try
581  {
582  // Construct ArmNN network
583  INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
584 
585  // Create ArmNN runtime
586  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
587 
588  // Optimise ArmNN network
589  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
590 
591  // Load network into runtime
592  NetworkId networkIdentifier;
593  CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
594 
595  //Creates structures for inputs and outputs.
596  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
597  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
598  std::vector<T> outputDataFused(LayerTest::outputSize);
599 
600  TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
601  inputTensorInfo.SetConstant(true);
602 
603  InputTensors inputTensorsFused{
604  {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
605  OutputTensors outputTensorsFused{
606  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
607 
608  // Execute network
609  run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
610 
611  success = true;
612  }
613  catch (const std::exception& e)
614  {
615  std::cerr << e.what() << std::endl;
616  success = false;
617  }
618 
619  return success;
620 }
621 
622 }
623 
624 #if defined(ARMCOMPUTENEON_ENABLED)
625 TEST_SUITE("Optimizer")
626 {
627 // ReLu fused into Receiver Layers Float32
628 TEST_CASE("FuseReLUIntoConvFloat32CpuAccTest")
629 {
630  ActivationDescriptor activationDescriptor;
631  activationDescriptor.m_Function = ActivationFunction::ReLu;
632 
633  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
634  (activationDescriptor, 0.0001f, Compute::CpuAcc);
635 }
636 TEST_CASE("FuseReLUIntoDWConvFloat32CpuAccTest")
637 {
638  ActivationDescriptor activationDescriptor;
639  activationDescriptor.m_Function = ActivationFunction::ReLu;
640 
641  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
642  (activationDescriptor, 0.0001f, Compute::CpuAcc);
643 }
644 TEST_CASE("FuseReLUIntoFullyConnectedFloat32CpuAccTest")
645 {
646  ActivationDescriptor activationDescriptor;
647  activationDescriptor.m_Function = ActivationFunction::ReLu;
648 
649  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
650  (activationDescriptor, 0.0001f, Compute::CpuAcc);
651 }
652 TEST_CASE("FuseReLUIntoBatchNormFloat32CpuAccTest")
653 {
654  ActivationDescriptor activationDescriptor;
655  activationDescriptor.m_Function = ActivationFunction::ReLu;
656 
657  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
658  (activationDescriptor, 0.0001f, Compute::CpuAcc);
659 }
660 
661 // BoundedReLu fused into Receiver Layers Float32
662 TEST_CASE("FuseBoundedReLUIntoConvFloat32CpuAccTest")
663 {
664  ActivationDescriptor activationDescriptor;
665  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
666  activationDescriptor.m_A = 1.0f;
667  activationDescriptor.m_B = -1.0f;
668 
669  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
670  (activationDescriptor, 0.0001f, Compute::CpuAcc);
671 }
672 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
673 {
674  ActivationDescriptor activationDescriptor;
675  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
676  activationDescriptor.m_A = 1.0f;
677  activationDescriptor.m_B = -1.0f;
678 
679  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
680  (activationDescriptor, 0.0001f, Compute::CpuAcc);
681 }
682 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
683 {
684  ActivationDescriptor activationDescriptor;
685  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
686  activationDescriptor.m_A = 1.0f;
687  activationDescriptor.m_B = -1.0f;
688 
689  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
690  (activationDescriptor, 0.0001f, Compute::CpuAcc);
691 }
692 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
693 {
694  ActivationDescriptor activationDescriptor;
695  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
696  activationDescriptor.m_A = 1.0f;
697  activationDescriptor.m_B = -1.0f;
698 
699  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
700  (activationDescriptor, 0.0001f, Compute::CpuAcc);
701 }
702 
703 // ReLU fused into Receiver Layers QAsymmU8
704 TEST_CASE("FuseReLUIntoConvQAsymmU8CpuAccTest")
705 {
706  ActivationDescriptor activationDescriptor;
707  activationDescriptor.m_Function = ActivationFunction::ReLu;
708 
709  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
710  (activationDescriptor, 0.0001f, Compute::CpuAcc);
711 }
712 TEST_CASE("FuseReLUIntoDWConvQAsymmU8CpuAccTest")
713 {
714  ActivationDescriptor activationDescriptor;
715  activationDescriptor.m_Function = ActivationFunction::ReLu;
716 
717  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
718  (activationDescriptor, 0.0001f, Compute::CpuAcc);
719 }
720 TEST_CASE("FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
721 {
722  ActivationDescriptor activationDescriptor;
723  activationDescriptor.m_Function = ActivationFunction::ReLu;
724 
725  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
726  (activationDescriptor, 0.0001f, Compute::CpuAcc);
727 }
728 
729 // BoundedReLu fused into Receiver Layers QAsymmS8
730 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
731 {
732  ActivationDescriptor activationDescriptor;
733  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
734  activationDescriptor.m_A = 6.0f;
735  activationDescriptor.m_B = 0.0f;
736 
737  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
738  (activationDescriptor, 0.0001f, Compute::CpuAcc);
739 }
740 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
741 {
742  ActivationDescriptor activationDescriptor;
743  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
744  activationDescriptor.m_A = 6.0f;
745  activationDescriptor.m_B = 0.0f;
746 
747  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
748  (activationDescriptor, 0.0001f, Compute::CpuAcc);
749 }
750 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
751 {
752  ActivationDescriptor activationDescriptor;
753  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
754  activationDescriptor.m_A = 6.0f;
755  activationDescriptor.m_B = 0.0f;
756 
757  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
758  (activationDescriptor, 0.0001f, Compute::CpuAcc);
759 }
760 
761 // TanH fused into Receiver Layers Float32
762 TEST_CASE("FuseTanHIntoConvFloat32CpuAccTest")
763 {
764  ActivationDescriptor activationDescriptor;
765  activationDescriptor.m_Function = ActivationFunction::TanH;
766 
767  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
768  (activationDescriptor, 0.0001f, Compute::CpuAcc);
769 }
770 
771 // HardSwish fused into Receiver Layers Float32
772 TEST_CASE("FuseHardSwishIntoConvFloat32CpuAccTest")
773 {
774  ActivationDescriptor activationDescriptor;
775  activationDescriptor.m_Function = ActivationFunction::HardSwish;
776 
777  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
778  (activationDescriptor, 0.0001f, Compute::CpuAcc);
779 }
780 
781 // Test that all receiver layers follow by all activation layers work, either fused or not fused
782 TEST_CASE("LayerFollowedByActivationFloat32CpuAccTest")
783 {
784  ActivationDescriptor activationDescriptor;
785  for (int i = 0; i != 12; ++i)
786  {
787  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
788  activationDescriptor.m_A = 1.0f;
789  activationDescriptor.m_B = -1.0f;
790  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
791  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
792  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
793  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
794  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
795  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
796  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
797  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
798  }
799 }
800 TEST_CASE("LayerFollowedByActivationFloat16CpuAccTest")
801 {
802  ActivationDescriptor activationDescriptor;
803  for (int i = 0; i != 12; ++i)
804  {
805  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
806  activationDescriptor.m_A = 1.0f;
807  activationDescriptor.m_B = -1.0f;
808  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
809  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
810  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
811  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
812  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
813  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
814  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
815  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
816  }
817 }
818 TEST_CASE("LayerFollowedByActivationQAsymmU8CpuAccTest")
819 {
820  ActivationDescriptor activationDescriptor;
821 
822  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
823  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
824  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
825  static_cast<int>(activationDescriptor.m_Function));
826  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
827  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
828  static_cast<int>(activationDescriptor.m_Function));
829 
830  activationDescriptor.m_Function = ActivationFunction::TanH;
831  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
832  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
833  static_cast<int>(activationDescriptor.m_Function));
834  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
835  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
836  static_cast<int>(activationDescriptor.m_Function));
837 
838  activationDescriptor.m_Function = ActivationFunction::ReLu;
839  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
840  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
841  static_cast<int>(activationDescriptor.m_Function));
842  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
843  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
844  static_cast<int>(activationDescriptor.m_Function));
845 
846  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
847  activationDescriptor.m_A = 1.0f;
848  activationDescriptor.m_B = -1.0f;
849  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
850  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
851  static_cast<int>(activationDescriptor.m_Function));
852  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
853  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
854  static_cast<int>(activationDescriptor.m_Function));
855 
856  activationDescriptor.m_Function = ActivationFunction::HardSwish;
857  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
858  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
859  static_cast<int>(activationDescriptor.m_Function));
860  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
861  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
862  static_cast<int>(activationDescriptor.m_Function));
863 }
864 }
865 #endif
866 
867 #if defined(ARMCOMPUTECL_ENABLED)
868 TEST_SUITE("Optimizer")
869 {
870 // ReLu fused into Receiver Layers Float32
871 TEST_CASE("FuseReLUIntoConvFloat32GpuAccTest")
872 {
873  ActivationDescriptor activationDescriptor;
874  activationDescriptor.m_Function = ActivationFunction::ReLu;
875 
876  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
877  (activationDescriptor, 0.0001f, Compute::GpuAcc);
878 }
879 TEST_CASE("FuseReLUIntoDWConvFloat32GpuAccTest")
880 {
881  ActivationDescriptor activationDescriptor;
882  activationDescriptor.m_Function = ActivationFunction::ReLu;
883 
884  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
885  (activationDescriptor, 0.0001f, Compute::GpuAcc);
886 }
887 TEST_CASE("FuseReLUIntoFullyConnectedFloat32GpuAccTest")
888 {
889  ActivationDescriptor activationDescriptor;
890  activationDescriptor.m_Function = ActivationFunction::ReLu;
891 
892  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
893  (activationDescriptor, 0.0001f, Compute::GpuAcc);
894 }
895 TEST_CASE("FuseReLUIntoBatchNormFloat32GpuAccTest")
896 {
897  ActivationDescriptor activationDescriptor;
898  activationDescriptor.m_Function = ActivationFunction::ReLu;
899 
900  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
901  (activationDescriptor, 0.0001f, Compute::GpuAcc);
902 }
903 TEST_CASE("FuseReLUIntoMulFloat32GpuAccTest")
904 {
905  ActivationDescriptor activationDescriptor;
906  activationDescriptor.m_Function = ActivationFunction::ReLu;
907 
908  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
909  (activationDescriptor, 0.0001f, Compute::GpuAcc);
910 }
911 TEST_CASE("FuseReLUIntoAddFloat32GpuAccTest")
912 {
913  ActivationDescriptor activationDescriptor;
914  activationDescriptor.m_Function = ActivationFunction::ReLu;
915 
916  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
917  (activationDescriptor, 0.0001f, Compute::GpuAcc);
918 }
919 TEST_CASE("FuseReLUIntoSubFloat32GpuAccTest")
920 {
921  ActivationDescriptor activationDescriptor;
922  activationDescriptor.m_Function = ActivationFunction::ReLu;
923 
924  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
925  (activationDescriptor, 0.0001f, Compute::GpuAcc);
926 }
927 TEST_CASE("FuseReLUIntoDivFloat32GpuAccTest")
928 {
929  ActivationDescriptor activationDescriptor;
930  activationDescriptor.m_Function = ActivationFunction::ReLu;
931 
932  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
933  (activationDescriptor, 0.0001f, Compute::GpuAcc);
934 }
935 
936 // BoundedReLu fused into Receiver Layers Float32
937 TEST_CASE("FuseBoundedReLUIntoConvFloat32GpuAccTest")
938 {
939  ActivationDescriptor activationDescriptor;
940  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
941  activationDescriptor.m_A = 1.0f;
942  activationDescriptor.m_B = -1.0f;
943 
944  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
945  (activationDescriptor, 0.0001f, Compute::GpuAcc);
946 }
947 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
948 {
949  ActivationDescriptor activationDescriptor;
950  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
951  activationDescriptor.m_A = 1.0f;
952  activationDescriptor.m_B = -1.0f;
953 
954  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
955  (activationDescriptor, 0.0001f, Compute::GpuAcc);
956 }
957 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
958 {
959  ActivationDescriptor activationDescriptor;
960  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
961  activationDescriptor.m_A = 1.0f;
962  activationDescriptor.m_B = -1.0f;
963 
964  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
965  (activationDescriptor, 0.0001f, Compute::GpuAcc);
966 }
967 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
968 {
969  ActivationDescriptor activationDescriptor;
970  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
971  activationDescriptor.m_A = 1.0f;
972  activationDescriptor.m_B = -1.0f;
973 
974  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
975  (activationDescriptor, 0.0001f, Compute::GpuAcc);
976 }
977 TEST_CASE("FuseBoundedReLUIntoMulFloat32GpuAccTest")
978 {
979  ActivationDescriptor activationDescriptor;
980  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
981  activationDescriptor.m_A = 1.0f;
982  activationDescriptor.m_B = -1.0f;
983 
984  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
985  (activationDescriptor, 0.0001f, Compute::GpuAcc);
986 }
987 TEST_CASE("FuseBoundedReLUIntoAddFloat32GpuAccTest")
988 {
989  ActivationDescriptor activationDescriptor;
990  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
991  activationDescriptor.m_A = 1.0f;
992  activationDescriptor.m_B = -1.0f;
993 
994  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
995  (activationDescriptor, 0.0001f, Compute::GpuAcc);
996 }
997 TEST_CASE("FuseBoundedReLUIntoSubFloat32GpuAccTest")
998 {
999  ActivationDescriptor activationDescriptor;
1000  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1001  activationDescriptor.m_A = 1.0f;
1002  activationDescriptor.m_B = -1.0f;
1003 
1004  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1005  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1006 }
1007 TEST_CASE("FuseBoundedReLUIntoDivFloat32GpuAccTest")
1008 {
1009  ActivationDescriptor activationDescriptor;
1010  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1011  activationDescriptor.m_A = 1.0f;
1012  activationDescriptor.m_B = -1.0f;
1013 
1014  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1015  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1016 }
1017 
1018 // ReLu fused into Receiver Layers Float16
1019 TEST_CASE("FuseReLUIntoConvFloat16GpuAccTest")
1020 {
1021  ActivationDescriptor activationDescriptor;
1022  activationDescriptor.m_Function = ActivationFunction::ReLu;
1023 
1024  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1025  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1026 }
1027 TEST_CASE("FuseReLUIntoDWConvFloat16GpuAccTest")
1028 {
1029  ActivationDescriptor activationDescriptor;
1030  activationDescriptor.m_Function = ActivationFunction::ReLu;
1031 
1032  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1033  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1034 }
1035 TEST_CASE("FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1036 {
1037  ActivationDescriptor activationDescriptor;
1038  activationDescriptor.m_Function = ActivationFunction::ReLu;
1039 
1040  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1041  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1042 }
1043 TEST_CASE("FuseReLUIntoBatchNormFloat16GpuAccTest")
1044 {
1045  ActivationDescriptor activationDescriptor;
1046  activationDescriptor.m_Function = ActivationFunction::ReLu;
1047 
1048  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1049  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1050 }
1051 TEST_CASE("FuseReLUIntoMulFloat16GpuAccTest")
1052 {
1053  ActivationDescriptor activationDescriptor;
1054  activationDescriptor.m_Function = ActivationFunction::ReLu;
1055 
1056  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1057  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1058 }
1059 TEST_CASE("FuseReLUIntoAddFloat16GpuAccTest")
1060 {
1061  ActivationDescriptor activationDescriptor;
1062  activationDescriptor.m_Function = ActivationFunction::ReLu;
1063 
1064  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>, DataType::Float16>
1065  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1066 }
1067 TEST_CASE("FuseReLUIntoSubFloat16GpuAccTest")
1068 {
1069  ActivationDescriptor activationDescriptor;
1070  activationDescriptor.m_Function = ActivationFunction::ReLu;
1071 
1072  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1073  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1074 }
1075 TEST_CASE("FuseReLUIntoDivFloat16GpuAccTest")
1076 {
1077  ActivationDescriptor activationDescriptor;
1078  activationDescriptor.m_Function = ActivationFunction::ReLu;
1079 
1080  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>, DataType::Float16>
1081  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1082 }
1083 
1084 // ReLU fused into Receiver Layers QAsymmU8
1085 TEST_CASE("FuseReLUQIntoConvAsymmU8GpuAccTest")
1086 {
1087  ActivationDescriptor activationDescriptor;
1088  activationDescriptor.m_Function = ActivationFunction::ReLu;
1089 
1090  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1091  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1092 }
1093 TEST_CASE("FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1094 {
1095  ActivationDescriptor activationDescriptor;
1096  activationDescriptor.m_Function = ActivationFunction::ReLu;
1097 
1098  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1099  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1100 }
1101 TEST_CASE("FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1102 {
1103  ActivationDescriptor activationDescriptor;
1104  activationDescriptor.m_Function = ActivationFunction::ReLu;
1105 
1106  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1107  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1108 }
1109 
1110 // BoundedReLu fused into Receiver Layers QAsymmS8
1111 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1112 {
1113  ActivationDescriptor activationDescriptor;
1114  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1115  activationDescriptor.m_A = 6.0f;
1116  activationDescriptor.m_B = 0.0f;
1117 
1118  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1119  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1120 }
1121 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1122 {
1123  ActivationDescriptor activationDescriptor;
1124  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1125  activationDescriptor.m_A = 6.0f;
1126  activationDescriptor.m_B = 0.0f;
1127 
1128  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
1129  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1130 }
1131 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1132 {
1133  ActivationDescriptor activationDescriptor;
1134  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1135  activationDescriptor.m_A = 6.0f;
1136  activationDescriptor.m_B = 0.0f;
1137 
1138  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1139  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1140 }
1141 
1142 // TanH fused into Receiver Layers Float32
1143 TEST_CASE("FuseTanHIntoConvFloat32GpuAccTest")
1144 {
1145  ActivationDescriptor activationDescriptor;
1146  activationDescriptor.m_Function = ActivationFunction::TanH;
1147 
1148  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1149  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1150 }
1151 TEST_CASE("FuseTanHIntoMulFloat32GpuAccTest")
1152 {
1153  ActivationDescriptor activationDescriptor;
1154  activationDescriptor.m_Function = ActivationFunction::TanH;
1155 
1156  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1157  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1158 }
1159 TEST_CASE("FuseTanHIntoAddFloat32GpuAccTest")
1160 {
1161  ActivationDescriptor activationDescriptor;
1162  activationDescriptor.m_Function = ActivationFunction::TanH;
1163 
1164  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1165  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1166 }
1167 TEST_CASE("FuseTanHIntoSubFloat32GpuAccTest")
1168 {
1169  ActivationDescriptor activationDescriptor;
1170  activationDescriptor.m_Function = ActivationFunction::TanH;
1171 
1172  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1173  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1174 }
1175 TEST_CASE("FuseTanHIntoDivFloat32GpuAccTest")
1176 {
1177  ActivationDescriptor activationDescriptor;
1178  activationDescriptor.m_Function = ActivationFunction::TanH;
1179 
1180  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1181  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1182 }
1183 
1184 // HardSwish fused into Receiver Layers Float32
1185 TEST_CASE("FuseHardSwishIntoConvFloat32GpuAccTest")
1186 {
1187  ActivationDescriptor activationDescriptor;
1188  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1189 
1190  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1191  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1192 }
1193 TEST_CASE("FuseHardSwishIntoMulFloat32GpuAccTest")
1194 {
1195  ActivationDescriptor activationDescriptor;
1196  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1197 
1198  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1199  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1200 }
1201 TEST_CASE("FuseHardSwishIntoAddFloat32GpuAccTest")
1202 {
1203  ActivationDescriptor activationDescriptor;
1204  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1205 
1206  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1207  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1208 }
1209 TEST_CASE("FuseHardSwishIntoSubFloat32GpuAccTest")
1210 {
1211  ActivationDescriptor activationDescriptor;
1212  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1213 
1214  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1215  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1216 }
1217 TEST_CASE("FuseHardSwishIntoDivFloat32GpuAccTest")
1218 {
1219  ActivationDescriptor activationDescriptor;
1220  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1221 
1222  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1223  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1224 }
1225 
1226 // Test that all receiver layers follow by all activation layers work, either fused or not fused
1227 TEST_CASE("LayerFollowedByActivationFloat32GpuAccTest")
1228 {
1229  ActivationDescriptor activationDescriptor;
1230  for (int i = 0; i != 12; ++i)
1231  {
1232  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1233  activationDescriptor.m_A = 1.0f;
1234  activationDescriptor.m_B = -1.0f;
1235  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1236  {
1237  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1238  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1239  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
1240  (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
1241  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
1242  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1243  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
1244  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1245  CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1246  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1247  CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
1248  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1249  CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1250  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1251  CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
1252  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1253  }
1254  }
1255 }
1256 TEST_CASE("LayerFollowedByActivationFloat16GpuAccTest")
1257 {
1258  ActivationDescriptor activationDescriptor;
1259  for (int i = 0; i != 12; ++i)
1260  {
1261  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1262  activationDescriptor.m_A = 1.0f;
1263  activationDescriptor.m_B = -1.0f;
1264  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1265  {
1266  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1267  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1268  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1269  (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
1270  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1271  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1272  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1273  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1274  CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1275  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1276  CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
1277  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1278  CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1279  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1280  CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
1281  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1282  }
1283  }
1284 }
1285 TEST_CASE("LayerFollowedByActivationQAsymmU8GpuAccTest")
1286 {
1287  ActivationDescriptor activationDescriptor;
1288 
1289  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
1290  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1291  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
1292  static_cast<int>(activationDescriptor.m_Function));
1293  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1294  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
1295  static_cast<int>(activationDescriptor.m_Function));
1296 
1297  activationDescriptor.m_Function = ActivationFunction::TanH;
1298  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1299  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
1300  static_cast<int>(activationDescriptor.m_Function));
1301  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1302  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
1303  static_cast<int>(activationDescriptor.m_Function));
1304 
1305  activationDescriptor.m_Function = ActivationFunction::ReLu;
1306  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1307  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1308  static_cast<int>(activationDescriptor.m_Function));
1309  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1310  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1311  static_cast<int>(activationDescriptor.m_Function));
1312 
1313  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1314  activationDescriptor.m_A = 1.0f;
1315  activationDescriptor.m_B = -1.0f;
1316  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1317  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1318  static_cast<int>(activationDescriptor.m_Function));
1319  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1320  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1321  static_cast<int>(activationDescriptor.m_Function));
1322 
1323  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1324  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1325  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1326  static_cast<int>(activationDescriptor.m_Function));
1327  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1328  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1329  static_cast<int>(activationDescriptor.m_Function));
1330 }
1331 }
1332 #endif
TEST_SUITE("TestConstTensorLayerVisitor")
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
Definition: Network.cpp:358
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
bool m_BiasEnabled
Enable/disable bias.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
Definition: TestUtils.hpp:21
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
Definition: Network.cpp:313
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
Definition: Network.cpp:130
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:249
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Definition: Network.cpp:85
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
Definition: Network.cpp:353
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
const IOutputSlot * GetConnection() const override
Definition: Layer.hpp:204
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
Definition: Network.cpp:168
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, bool constantWeights)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:321
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
DataType
Definition: Types.hpp:35
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1680
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
Definition: Network.cpp:268
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:47
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:61
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
Definition: Network.cpp:258
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
Definition: Network.cpp:263
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:492
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:63
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:59
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
ActivationFunction
Definition: Types.hpp:73
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:458