ArmNN
 22.05
FuseActivationTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LayersFwd.hpp"
7 
8 #include <Network.hpp>
9 #include <ResolveType.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <GraphUtils.hpp>
12 #include <TestUtils.hpp>
13 
14 #include <doctest/doctest.h>
15 
17 #include <string>
18 
19 using namespace armnn;
20 
21 namespace
22 {
23 
24 template<typename T>
25 std::vector<T> GetVector(unsigned int size, float initial, float increment)
26 {
27  std::vector<float> typeVector(size, initial);
28  std::vector<T> vector(size);
29 
30  if (size > 1)
31  {
32  for (unsigned int i = 0; i < size; ++i)
33  {
34  vector[i] = T(initial + (increment * static_cast<float>(i)));
35  }
36  }
37  return vector;
38 }
39 
40 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
41 struct Convolution2dTest
42 {
44  static const bool isElementWise = false;
45  static const bool isConstTensorAsInputSupported = true;
46 
47  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
48  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
49  static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
50 
51  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
52  constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
53 
54  static IConnectableLayer* AddReceiverLayer(INetwork* network,
55  const char* name,
56  float scale = 1.f,
57  int32_t offset = 0)
58  {
59  IgnoreUnused(scale);
60  IgnoreUnused(offset);
61 
62  Convolution2dDescriptor descriptor;
63  descriptor.m_DataLayout = DataLayout::NHWC;
64  descriptor.m_StrideX = 1;
65  descriptor.m_StrideY = 1;
66 
67  return network->AddConvolution2dLayer(descriptor, name);
68  }
69 
70  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
71  float scale = 1.f,
72  int32_t offset = 0)
73  {
74 
75  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
76  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
77  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
78  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42 };
79  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
80  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
81  ConstTensor weights(weightsInfo, weightsVector);
82 
83  IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
84  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
85 
86  std::vector<IConnectableLayer*> layers = { weightsLayer };
87  return layers;
88  }
89 };
90 
91 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
92 struct DWConvolution2dTest
93 {
94 public:
96  static const bool isElementWise = false;
97  static const bool isConstTensorAsInputSupported = true;
98 
99  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin]
100  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout]
101  static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); } // [1,H,W,Cout]
102 
103  constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
104  constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
105 
106  static IConnectableLayer* AddReceiverLayer(INetwork* network,
107  const char* name,
108  float scale = 1.f,
109  int32_t offset = 0)
110  {
111  IgnoreUnused(scale);
112  IgnoreUnused(offset);
113 
115  descriptor.m_BiasEnabled = false;
116  descriptor.m_DataLayout = DataLayout::NHWC;
117  descriptor.m_StrideX = 1;
118  descriptor.m_StrideY = 1;
119 
120  return network->AddDepthwiseConvolution2dLayer(descriptor, name);
121  }
122 
123  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
124  float scale = 1.f,
125  int32_t offset = 0)
126  {
127  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
128  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
129  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
130  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
131  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
132  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
133  ConstTensor weights(weightsInfo, weightsVector);
134 
135  IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
136  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
137 
138  std::vector<IConnectableLayer*> layers = { weightsLayer };
139  return layers;
140  }
141 };
142 
143 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
144 struct FullyConnectedTest
145 {
146 public:
148  static const bool isElementWise = false;
149  static const bool isConstTensorAsInputSupported = true;
150 
151  static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
152  static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
153  static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
154 
155  constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
156  constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
157 
158  static IConnectableLayer* AddReceiverLayer(INetwork* network,
159  const char* name,
160  float scale = 1.f,
161  int32_t offset = 0)
162  {
163  IgnoreUnused(scale);
164  IgnoreUnused(offset);
165 
166  FullyConnectedDescriptor descriptor;
167  descriptor.m_BiasEnabled = false;
168 
169  return network->AddFullyConnectedLayer(descriptor, name);
170  }
171 
172  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
173  float scale = 1.f,
174  int32_t offset = 0)
175  {
176  std::vector<float> weightsData = { 1, 2, 3, 4, 5,
177  6, 7, 8, 9, 10,
178  11, 12, 13, 14, 15};
179  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
180  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
181  ConstTensor weights(weightsInfo, weightsVector);
182 
183  IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
184  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
185 
186  std::vector<IConnectableLayer*> layers = { weightsLayer };
187  return layers;
188  }
189 };
190 
191 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
192 struct BatchNormTest
193 {
194 public:
196  static const bool isElementWise = false;
197  static const bool isConstTensorAsInputSupported = false;
198 
199  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
200  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
201 
202  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
203  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
204 
205  static IConnectableLayer* AddReceiverLayer(INetwork* network,
206  const char* name,
207  float scale = 1.f,
208  int32_t offset = 0)
209  {
210  IgnoreUnused(scale);
211  IgnoreUnused(offset);
212 
213  BatchNormalizationDescriptor descriptor;
214  descriptor.m_DataLayout = DataLayout::NHWC;
215 
216  std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
217  std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
218  std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
219  std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
220 
221  const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
222  ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), betaVector);
223  ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), gammaVector);
224  ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), meanVector);
225  ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType, 0.0f, 0, true), varianceVector);
226 
227  return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
228  }
229 
230  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
231  float scale = 1.f,
232  int32_t offset = 0)
233  {
234  IgnoreUnused(network);
235  IgnoreUnused(scale);
236  IgnoreUnused(offset);
237  return {};
238  }
239 };
240 
241 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
242 struct MultiplicationTest
243 {
245  static const bool isElementWise = true;
246  static const bool isConstTensorAsInputSupported = false;
247 
248  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
249  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
250 
251  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
252  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
253 
254  static IConnectableLayer* AddReceiverLayer(INetwork* network,
255  const char* name,
256  float scale = 1.f,
257  int32_t offset = 0)
258  {
259  IgnoreUnused(scale);
260  IgnoreUnused(offset);
261 
262  return network->AddMultiplicationLayer(name);
263  }
264 
265  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
266  float scale = 1.f,
267  int32_t offset = 0)
268  {
269  IgnoreUnused(network);
270  IgnoreUnused(scale);
271  IgnoreUnused(offset);
272  return {};
273  }
274 };
275 
276 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
277 struct AdditionTest
278 {
279  using LayerType = AdditionLayer;
280  static const bool isElementWise = true;
281  static const bool isConstTensorAsInputSupported = false;
282 
283  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
284  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
285 
286  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
287  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
288 
289  static IConnectableLayer* AddReceiverLayer(INetwork* network,
290  const char* name,
291  float scale = 1.f,
292  int32_t offset = 0)
293  {
294  IgnoreUnused(scale);
295  IgnoreUnused(offset);
296 
297  return network->AddAdditionLayer(name);
298  }
299 
300  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
301  float scale = 1.f,
302  int32_t offset = 0)
303  {
304  IgnoreUnused(network);
305  IgnoreUnused(scale);
306  IgnoreUnused(offset);
307  return {};
308  }
309 };
310 
311 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
312 struct SubtractionTest
313 {
314  using LayerType = SubtractionLayer;
315  static const bool isElementWise = true;
316  static const bool isConstTensorAsInputSupported = false;
317 
318  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
319  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
320 
321  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
322  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
323 
324  static IConnectableLayer* AddReceiverLayer(INetwork* network,
325  const char* name,
326  float scale = 1.f,
327  int32_t offset = 0)
328  {
329  IgnoreUnused(scale);
330  IgnoreUnused(offset);
331 
332  return network->AddSubtractionLayer(name);
333  }
334 
335  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
336  float scale = 1.f,
337  int32_t offset = 0)
338  {
339  IgnoreUnused(network);
340  IgnoreUnused(scale);
341  IgnoreUnused(offset);
342  return {};
343  }
344 };
345 
346 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
347 struct DivisionTest
348 {
349  using LayerType = DivisionLayer;
350  static const bool isElementWise = true;
351  static const bool isConstTensorAsInputSupported = false;
352 
353  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
354  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
355 
356  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
357  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
358 
359  static IConnectableLayer* AddReceiverLayer(INetwork* network,
360  const char* name,
361  float scale = 1.f,
362  int32_t offset = 0)
363  {
364  IgnoreUnused(scale);
365  IgnoreUnused(offset);
366 
367  return network->AddDivisionLayer(name);
368  }
369 
370  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
371  float scale = 1.f,
372  int32_t offset = 0)
373  {
374  IgnoreUnused(network);
375  IgnoreUnused(scale);
376  IgnoreUnused(offset);
377  return {};
378  }
379 };
380 
381 template<typename LayerTest,
382  DataType ArmnnType>
383 INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
384  float scale, int32_t offset)
385 {
386  // Create a network
387  INetworkPtr network = INetwork::Create();
388 
389  IConnectableLayer* inputLayer = network->AddInputLayer(0);
390 
391  IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
392  "receiverLayer",
393  scale,
394  offset);
395 
396  IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
397  "activation");
398 
399  IConnectableLayer* outputLayer = network->AddOutputLayer(0);
400  IConnectableLayer* output2Layer = preventFusing ? network->AddOutputLayer(1) : nullptr;
401 
402  // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
403  if (LayerTest::isConstTensorAsInputSupported)
404  {
405  std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
406  scale,
407  offset);
408 
409  // Connect constant layers to receiverLayer.
410  for (unsigned int i = 0; i < constantLayers.size(); ++i)
411  {
412  constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1));
413  }
414  }
415 
416  // Define layers information
417  TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
418  TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
419 
420  // Set layer information
421  inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
422  receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
423  activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
424 
425  // Connect layers
426  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
427  receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
428  activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
429 
430  if (LayerTest::isElementWise)
431  {
432  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
433  }
434  if (preventFusing)
435  {
436  receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
437  }
438 
439  return network;
440 }
441 
442 template<typename LayerTest,
443  DataType ArmnnType,
444  typename LayerType = typename LayerTest::LayerType,
445  typename T = ResolveType<ArmnnType>>
446 void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
447  float scale = 1.f, int32_t offset=0)
448 {
449  // FIRST NETWORK: Fused
450  // Construct ArmNN network
451  INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
452 
453  // Create ArmNN runtime
454  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
455 
456  // Optimise ArmNN network
457  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
458 
459  Graph& graphFused = GetGraphForTesting(optNetFused.get());
460 
461  auto checkFusedConv2d = [](const Layer* const layer)->bool {
462  return IsLayerOfType<LayerType>(layer) &&
463  (layer->GetNameStr() == "fused-activation-into-receiverLayer");
464  };
465 
466  // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
467  if(LayerTest::isConstTensorAsInputSupported)
468  {
469  CHECK(4 == graphFused.GetNumLayers());
470  CHECK(CheckSequence(graphFused.cbegin(),
471  graphFused.cend(),
472  &IsLayerOfType<InputLayer>,
473  &IsLayerOfType<ConstantLayer>,
474  checkFusedConv2d,
475  &IsLayerOfType<OutputLayer>));
476 
477  // Check if new constant layer is connected to fused receiver layer.
478  Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer");
479  CHECK(fusedReceiverLayer);
480  CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr);
481  }
482  else
483  {
484  CHECK(3 == graphFused.GetNumLayers());
485  CHECK(CheckSequence(graphFused.cbegin(),
486  graphFused.cend(),
487  &IsLayerOfType<InputLayer>,
488  checkFusedConv2d,
489  &IsLayerOfType<OutputLayer>));
490  }
491 
492  // Load network into runtime
493  NetworkId networkIdentifier;
494  CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
495 
496  //Creates structures for inputs and outputs.
497  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
498  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
499  std::vector<T> outputDataFused(LayerTest::outputSize);
500 
501  armnn::TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
502  inputTensorInfo.SetConstant(true);
503 
504  InputTensors inputTensorsFused{
505  {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
506  OutputTensors outputTensorsFused{
507  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
508 
509  // Execute network
510  CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) == Status::Success);
511 
512  // SECOND NETWORK: NotFused
513  // Construct ArmNN network
514  INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
515 
516  // Create ArmNN runtime
517  IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
518 
519  // Optimise ArmNN network
520  IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
521 
522  Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
523 
524  // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
525  if(LayerTest::isConstTensorAsInputSupported)
526  {
527  CHECK(6 == graphNotFused.GetNumLayers());
528  CHECK(CheckSequence(graphNotFused.cbegin(),
529  graphNotFused.cend(),
530  &IsLayerOfType<InputLayer>,
531  &IsLayerOfType<ConstantLayer>,
532  &IsLayerOfType<LayerType>,
533  &IsLayerOfType<ActivationLayer>,
534  &IsLayerOfType<OutputLayer>,
535  &IsLayerOfType<OutputLayer>));
536  }
537  else
538  {
539  CHECK(5 == graphNotFused.GetNumLayers());
540  CHECK(CheckSequence(graphNotFused.cbegin(),
541  graphNotFused.cend(),
542  &IsLayerOfType<InputLayer>,
543  &IsLayerOfType<LayerType>,
544  &IsLayerOfType<ActivationLayer>,
545  &IsLayerOfType<OutputLayer>,
546  &IsLayerOfType<OutputLayer>));
547  }
548 
549  // Load network into runtime
550  NetworkId networkIdentifierNotFused;
551  CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
552 
553  //Creates structures for inputs and outputs.
554  std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
555  std::vector<T> outputDataNotFused(LayerTest::outputSize);
556  std::vector<T> outputData2NotFused(LayerTest::outputSize);
557 
558  TensorInfo inputTensorInfoNotFused = runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0);
559  inputTensorInfoNotFused.SetConstant(true);
560 
561  InputTensors inputTensorsNotFused{
562  {0, ConstTensor(inputTensorInfoNotFused, inputDataNotFused.data())}};
563  OutputTensors outputTensorsNotFused{
564  {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
565  {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
566 
567  // Execute network
568  CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
569  == Status::Success);
570 
571  // Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
572  for (unsigned int n = 0; n < outputDataFused.size(); ++n)
573  {
574  auto outputNotFused = static_cast<float>(outputDataNotFused[n]);
575  CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
576  }
577 }
578 
579 template<typename LayerTest,
580  DataType ArmnnType,
581  typename LayerType = typename LayerTest::LayerType,
582  typename T = ResolveType<ArmnnType>>
583 bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
584  float scale = 1.f, int32_t offset = 0)
585 {
586  bool success;
587  try
588  {
589  // Construct ArmNN network
590  INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
591 
592  // Create ArmNN runtime
593  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
594 
595  // Optimise ArmNN network
596  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
597 
598  // Load network into runtime
599  NetworkId networkIdentifier;
600  CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
601 
602  //Creates structures for inputs and outputs.
603  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
604  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
605  std::vector<T> outputDataFused(LayerTest::outputSize);
606 
607  TensorInfo inputTensorInfo = run->GetInputTensorInfo(networkIdentifier, 0);
608  inputTensorInfo.SetConstant(true);
609 
610  InputTensors inputTensorsFused{
611  {0, ConstTensor(inputTensorInfo, inputDataFused.data())}};
612  OutputTensors outputTensorsFused{
613  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
614 
615  // Execute network
616  run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
617 
618  success = true;
619  }
620  catch (const std::exception& e)
621  {
622  std::cerr << e.what() << std::endl;
623  success = false;
624  }
625 
626  return success;
627 }
628 
629 }
630 
631 #if defined(ARMCOMPUTENEON_ENABLED)
632 TEST_SUITE("Optimizer")
633 {
634 // ReLu fused into Receiver Layers Float32
635 TEST_CASE("FuseReLUIntoConvFloat32CpuAccTest")
636 {
637  ActivationDescriptor activationDescriptor;
638  activationDescriptor.m_Function = ActivationFunction::ReLu;
639 
640  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
641  (activationDescriptor, 0.0001f, Compute::CpuAcc);
642 }
643 TEST_CASE("FuseReLUIntoDWConvFloat32CpuAccTest")
644 {
645  ActivationDescriptor activationDescriptor;
646  activationDescriptor.m_Function = ActivationFunction::ReLu;
647 
648  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
649  (activationDescriptor, 0.0001f, Compute::CpuAcc);
650 }
651 TEST_CASE("FuseReLUIntoFullyConnectedFloat32CpuAccTest")
652 {
653  ActivationDescriptor activationDescriptor;
654  activationDescriptor.m_Function = ActivationFunction::ReLu;
655 
656  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
657  (activationDescriptor, 0.0001f, Compute::CpuAcc);
658 }
659 TEST_CASE("FuseReLUIntoBatchNormFloat32CpuAccTest")
660 {
661  ActivationDescriptor activationDescriptor;
662  activationDescriptor.m_Function = ActivationFunction::ReLu;
663 
664  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
665  (activationDescriptor, 0.0001f, Compute::CpuAcc);
666 }
667 
668 // BoundedReLu fused into Receiver Layers Float32
669 TEST_CASE("FuseBoundedReLUIntoConvFloat32CpuAccTest")
670 {
671  ActivationDescriptor activationDescriptor;
672  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
673  activationDescriptor.m_A = 1.0f;
674  activationDescriptor.m_B = -1.0f;
675 
676  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
677  (activationDescriptor, 0.0001f, Compute::CpuAcc);
678 }
679 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
680 {
681  ActivationDescriptor activationDescriptor;
682  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
683  activationDescriptor.m_A = 1.0f;
684  activationDescriptor.m_B = -1.0f;
685 
686  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
687  (activationDescriptor, 0.0001f, Compute::CpuAcc);
688 }
689 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
690 {
691  ActivationDescriptor activationDescriptor;
692  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
693  activationDescriptor.m_A = 1.0f;
694  activationDescriptor.m_B = -1.0f;
695 
696  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
697  (activationDescriptor, 0.0001f, Compute::CpuAcc);
698 }
699 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
700 {
701  ActivationDescriptor activationDescriptor;
702  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
703  activationDescriptor.m_A = 1.0f;
704  activationDescriptor.m_B = -1.0f;
705 
706  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
707  (activationDescriptor, 0.0001f, Compute::CpuAcc);
708 }
709 
710 // ReLU fused into Receiver Layers QAsymmU8
711 TEST_CASE("FuseReLUIntoConvQAsymmU8CpuAccTest")
712 {
713  ActivationDescriptor activationDescriptor;
714  activationDescriptor.m_Function = ActivationFunction::ReLu;
715 
716  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
717  (activationDescriptor, 0.0001f, Compute::CpuAcc);
718 }
719 TEST_CASE("FuseReLUIntoDWConvQAsymmU8CpuAccTest")
720 {
721  ActivationDescriptor activationDescriptor;
722  activationDescriptor.m_Function = ActivationFunction::ReLu;
723 
724  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
725  (activationDescriptor, 0.0001f, Compute::CpuAcc);
726 }
727 TEST_CASE("FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
728 {
729  ActivationDescriptor activationDescriptor;
730  activationDescriptor.m_Function = ActivationFunction::ReLu;
731 
732  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
733  (activationDescriptor, 0.0001f, Compute::CpuAcc);
734 }
735 
736 // BoundedReLu fused into Receiver Layers QAsymmS8
737 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
738 {
739  ActivationDescriptor activationDescriptor;
740  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
741  activationDescriptor.m_A = 6.0f;
742  activationDescriptor.m_B = 0.0f;
743 
744  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
745  (activationDescriptor, 0.0001f, Compute::CpuAcc);
746 }
747 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
748 {
749  ActivationDescriptor activationDescriptor;
750  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
751  activationDescriptor.m_A = 6.0f;
752  activationDescriptor.m_B = 0.0f;
753 
754  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
755  (activationDescriptor, 0.0001f, Compute::CpuAcc);
756 }
757 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
758 {
759  ActivationDescriptor activationDescriptor;
760  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
761  activationDescriptor.m_A = 6.0f;
762  activationDescriptor.m_B = 0.0f;
763 
764  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
765  (activationDescriptor, 0.0001f, Compute::CpuAcc);
766 }
767 
768 // TanH fused into Receiver Layers Float32
769 TEST_CASE("FuseTanHIntoConvFloat32CpuAccTest")
770 {
771  ActivationDescriptor activationDescriptor;
772  activationDescriptor.m_Function = ActivationFunction::TanH;
773 
774  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
775  (activationDescriptor, 0.0001f, Compute::CpuAcc);
776 }
777 
778 // HardSwish fused into Receiver Layers Float32
779 TEST_CASE("FuseHardSwishIntoConvFloat32CpuAccTest")
780 {
781  ActivationDescriptor activationDescriptor;
782  activationDescriptor.m_Function = ActivationFunction::HardSwish;
783 
784  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
785  (activationDescriptor, 0.0001f, Compute::CpuAcc);
786 }
787 
788 // Test that all receiver layers follow by all activation layers work, either fused or not fused
789 TEST_CASE("LayerFollowedByActivationFloat32CpuAccTest")
790 {
791  ActivationDescriptor activationDescriptor;
792  for (int i = 0; i != 12; ++i)
793  {
794  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
795  activationDescriptor.m_A = 1.0f;
796  activationDescriptor.m_B = -1.0f;
797  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
798  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
799  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
800  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
801  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
802  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
803  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
804  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
805  }
806 }
807 TEST_CASE("LayerFollowedByActivationFloat16CpuAccTest")
808 {
809  ActivationDescriptor activationDescriptor;
810  for (int i = 0; i != 12; ++i)
811  {
812  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
813  activationDescriptor.m_A = 1.0f;
814  activationDescriptor.m_B = -1.0f;
815  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
816  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
817  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
818  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
819  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
820  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
821  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
822  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
823  }
824 }
825 TEST_CASE("LayerFollowedByActivationQAsymmU8CpuAccTest")
826 {
827  ActivationDescriptor activationDescriptor;
828 
829  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
830  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
831  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
832  static_cast<int>(activationDescriptor.m_Function));
833  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
834  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
835  static_cast<int>(activationDescriptor.m_Function));
836 
837  activationDescriptor.m_Function = ActivationFunction::TanH;
838  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
839  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
840  static_cast<int>(activationDescriptor.m_Function));
841  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
842  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
843  static_cast<int>(activationDescriptor.m_Function));
844 
845  activationDescriptor.m_Function = ActivationFunction::ReLu;
846  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
847  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
848  static_cast<int>(activationDescriptor.m_Function));
849  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
850  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
851  static_cast<int>(activationDescriptor.m_Function));
852 
853  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
854  activationDescriptor.m_A = 1.0f;
855  activationDescriptor.m_B = -1.0f;
856  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
857  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
858  static_cast<int>(activationDescriptor.m_Function));
859  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
860  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
861  static_cast<int>(activationDescriptor.m_Function));
862 
863  activationDescriptor.m_Function = ActivationFunction::HardSwish;
864  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
865  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
866  static_cast<int>(activationDescriptor.m_Function));
867  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
868  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
869  static_cast<int>(activationDescriptor.m_Function));
870 }
871 }
872 #endif
873 
874 #if defined(ARMCOMPUTECL_ENABLED)
875 TEST_SUITE("Optimizer")
876 {
877 // ReLu fused into Receiver Layers Float32
878 TEST_CASE("FuseReLUIntoConvFloat32GpuAccTest")
879 {
880  ActivationDescriptor activationDescriptor;
881  activationDescriptor.m_Function = ActivationFunction::ReLu;
882 
883  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
884  (activationDescriptor, 0.0001f, Compute::GpuAcc);
885 }
886 TEST_CASE("FuseReLUIntoDWConvFloat32GpuAccTest")
887 {
888  ActivationDescriptor activationDescriptor;
889  activationDescriptor.m_Function = ActivationFunction::ReLu;
890 
891  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
892  (activationDescriptor, 0.0001f, Compute::GpuAcc);
893 }
894 TEST_CASE("FuseReLUIntoFullyConnectedFloat32GpuAccTest")
895 {
896  ActivationDescriptor activationDescriptor;
897  activationDescriptor.m_Function = ActivationFunction::ReLu;
898 
899  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
900  (activationDescriptor, 0.0001f, Compute::GpuAcc);
901 }
902 TEST_CASE("FuseReLUIntoBatchNormFloat32GpuAccTest")
903 {
904  ActivationDescriptor activationDescriptor;
905  activationDescriptor.m_Function = ActivationFunction::ReLu;
906 
907  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
908  (activationDescriptor, 0.0001f, Compute::GpuAcc);
909 }
910 TEST_CASE("FuseReLUIntoMulFloat32GpuAccTest")
911 {
912  ActivationDescriptor activationDescriptor;
913  activationDescriptor.m_Function = ActivationFunction::ReLu;
914 
915  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
916  (activationDescriptor, 0.0001f, Compute::GpuAcc);
917 }
918 TEST_CASE("FuseReLUIntoAddFloat32GpuAccTest")
919 {
920  ActivationDescriptor activationDescriptor;
921  activationDescriptor.m_Function = ActivationFunction::ReLu;
922 
923  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
924  (activationDescriptor, 0.0001f, Compute::GpuAcc);
925 }
926 TEST_CASE("FuseReLUIntoSubFloat32GpuAccTest")
927 {
928  ActivationDescriptor activationDescriptor;
929  activationDescriptor.m_Function = ActivationFunction::ReLu;
930 
931  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
932  (activationDescriptor, 0.0001f, Compute::GpuAcc);
933 }
934 TEST_CASE("FuseReLUIntoDivFloat32GpuAccTest")
935 {
936  ActivationDescriptor activationDescriptor;
937  activationDescriptor.m_Function = ActivationFunction::ReLu;
938 
939  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
940  (activationDescriptor, 0.0001f, Compute::GpuAcc);
941 }
942 
943 // BoundedReLu fused into Receiver Layers Float32
944 TEST_CASE("FuseBoundedReLUIntoConvFloat32GpuAccTest")
945 {
946  ActivationDescriptor activationDescriptor;
947  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
948  activationDescriptor.m_A = 1.0f;
949  activationDescriptor.m_B = -1.0f;
950 
951  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
952  (activationDescriptor, 0.0001f, Compute::GpuAcc);
953 }
954 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
955 {
956  ActivationDescriptor activationDescriptor;
957  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
958  activationDescriptor.m_A = 1.0f;
959  activationDescriptor.m_B = -1.0f;
960 
961  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
962  (activationDescriptor, 0.0001f, Compute::GpuAcc);
963 }
964 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
965 {
966  ActivationDescriptor activationDescriptor;
967  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
968  activationDescriptor.m_A = 1.0f;
969  activationDescriptor.m_B = -1.0f;
970 
971  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
972  (activationDescriptor, 0.0001f, Compute::GpuAcc);
973 }
974 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
975 {
976  ActivationDescriptor activationDescriptor;
977  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
978  activationDescriptor.m_A = 1.0f;
979  activationDescriptor.m_B = -1.0f;
980 
981  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
982  (activationDescriptor, 0.0001f, Compute::GpuAcc);
983 }
984 TEST_CASE("FuseBoundedReLUIntoMulFloat32GpuAccTest")
985 {
986  ActivationDescriptor activationDescriptor;
987  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
988  activationDescriptor.m_A = 1.0f;
989  activationDescriptor.m_B = -1.0f;
990 
991  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
992  (activationDescriptor, 0.0001f, Compute::GpuAcc);
993 }
994 TEST_CASE("FuseBoundedReLUIntoAddFloat32GpuAccTest")
995 {
996  ActivationDescriptor activationDescriptor;
997  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
998  activationDescriptor.m_A = 1.0f;
999  activationDescriptor.m_B = -1.0f;
1000 
1001  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1002  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1003 }
1004 TEST_CASE("FuseBoundedReLUIntoSubFloat32GpuAccTest")
1005 {
1006  ActivationDescriptor activationDescriptor;
1007  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1008  activationDescriptor.m_A = 1.0f;
1009  activationDescriptor.m_B = -1.0f;
1010 
1011  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1012  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1013 }
1014 TEST_CASE("FuseBoundedReLUIntoDivFloat32GpuAccTest")
1015 {
1016  ActivationDescriptor activationDescriptor;
1017  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1018  activationDescriptor.m_A = 1.0f;
1019  activationDescriptor.m_B = -1.0f;
1020 
1021  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1022  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1023 }
1024 
1025 // ReLu fused into Receiver Layers Float16
1026 TEST_CASE("FuseReLUIntoConvFloat16GpuAccTest")
1027 {
1028  ActivationDescriptor activationDescriptor;
1029  activationDescriptor.m_Function = ActivationFunction::ReLu;
1030 
1031  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1032  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1033 }
1034 TEST_CASE("FuseReLUIntoDWConvFloat16GpuAccTest")
1035 {
1036  ActivationDescriptor activationDescriptor;
1037  activationDescriptor.m_Function = ActivationFunction::ReLu;
1038 
1039  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1040  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1041 }
1042 TEST_CASE("FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1043 {
1044  ActivationDescriptor activationDescriptor;
1045  activationDescriptor.m_Function = ActivationFunction::ReLu;
1046 
1047  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1048  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1049 }
1050 TEST_CASE("FuseReLUIntoBatchNormFloat16GpuAccTest")
1051 {
1052  ActivationDescriptor activationDescriptor;
1053  activationDescriptor.m_Function = ActivationFunction::ReLu;
1054 
1055  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1056  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1057 }
1058 TEST_CASE("FuseReLUIntoMulFloat16GpuAccTest")
1059 {
1060  ActivationDescriptor activationDescriptor;
1061  activationDescriptor.m_Function = ActivationFunction::ReLu;
1062 
1063  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1064  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1065 }
1066 TEST_CASE("FuseReLUIntoAddFloat16GpuAccTest")
1067 {
1068  ActivationDescriptor activationDescriptor;
1069  activationDescriptor.m_Function = ActivationFunction::ReLu;
1070 
1071  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>, DataType::Float16>
1072  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1073 }
1074 TEST_CASE("FuseReLUIntoSubFloat16GpuAccTest")
1075 {
1076  ActivationDescriptor activationDescriptor;
1077  activationDescriptor.m_Function = ActivationFunction::ReLu;
1078 
1079  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1080  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1081 }
1082 TEST_CASE("FuseReLUIntoDivFloat16GpuAccTest")
1083 {
1084  ActivationDescriptor activationDescriptor;
1085  activationDescriptor.m_Function = ActivationFunction::ReLu;
1086 
1087  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>, DataType::Float16>
1088  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1089 }
1090 
1091 // ReLU fused into Receiver Layers QAsymmU8
1092 TEST_CASE("FuseReLUQIntoConvAsymmU8GpuAccTest")
1093 {
1094  ActivationDescriptor activationDescriptor;
1095  activationDescriptor.m_Function = ActivationFunction::ReLu;
1096 
1097  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1098  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1099 }
1100 TEST_CASE("FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1101 {
1102  ActivationDescriptor activationDescriptor;
1103  activationDescriptor.m_Function = ActivationFunction::ReLu;
1104 
1105  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1106  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1107 }
1108 TEST_CASE("FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1109 {
1110  ActivationDescriptor activationDescriptor;
1111  activationDescriptor.m_Function = ActivationFunction::ReLu;
1112 
1113  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1114  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1115 }
1116 
1117 // BoundedReLu fused into Receiver Layers QAsymmS8
1118 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1119 {
1120  ActivationDescriptor activationDescriptor;
1121  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1122  activationDescriptor.m_A = 6.0f;
1123  activationDescriptor.m_B = 0.0f;
1124 
1125  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1126  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1127 }
1128 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1129 {
1130  ActivationDescriptor activationDescriptor;
1131  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1132  activationDescriptor.m_A = 6.0f;
1133  activationDescriptor.m_B = 0.0f;
1134 
1135  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
1136  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1137 }
1138 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1139 {
1140  ActivationDescriptor activationDescriptor;
1141  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1142  activationDescriptor.m_A = 6.0f;
1143  activationDescriptor.m_B = 0.0f;
1144 
1145  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1146  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1147 }
1148 
1149 // TanH fused into Receiver Layers Float32
1150 TEST_CASE("FuseTanHIntoConvFloat32GpuAccTest")
1151 {
1152  ActivationDescriptor activationDescriptor;
1153  activationDescriptor.m_Function = ActivationFunction::TanH;
1154 
1155  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1156  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1157 }
1158 TEST_CASE("FuseTanHIntoMulFloat32GpuAccTest")
1159 {
1160  ActivationDescriptor activationDescriptor;
1161  activationDescriptor.m_Function = ActivationFunction::TanH;
1162 
1163  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1164  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1165 }
1166 TEST_CASE("FuseTanHIntoAddFloat32GpuAccTest")
1167 {
1168  ActivationDescriptor activationDescriptor;
1169  activationDescriptor.m_Function = ActivationFunction::TanH;
1170 
1171  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1172  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1173 }
1174 TEST_CASE("FuseTanHIntoSubFloat32GpuAccTest")
1175 {
1176  ActivationDescriptor activationDescriptor;
1177  activationDescriptor.m_Function = ActivationFunction::TanH;
1178 
1179  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1180  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1181 }
1182 TEST_CASE("FuseTanHIntoDivFloat32GpuAccTest")
1183 {
1184  ActivationDescriptor activationDescriptor;
1185  activationDescriptor.m_Function = ActivationFunction::TanH;
1186 
1187  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1188  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1189 }
1190 
1191 // HardSwish fused into Receiver Layers Float32
1192 TEST_CASE("FuseHardSwishIntoConvFloat32GpuAccTest")
1193 {
1194  ActivationDescriptor activationDescriptor;
1195  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1196 
1197  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1198  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1199 }
1200 TEST_CASE("FuseHardSwishIntoMulFloat32GpuAccTest")
1201 {
1202  ActivationDescriptor activationDescriptor;
1203  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1204 
1205  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1206  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1207 }
1208 TEST_CASE("FuseHardSwishIntoAddFloat32GpuAccTest")
1209 {
1210  ActivationDescriptor activationDescriptor;
1211  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1212 
1213  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1214  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1215 }
1216 TEST_CASE("FuseHardSwishIntoSubFloat32GpuAccTest")
1217 {
1218  ActivationDescriptor activationDescriptor;
1219  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1220 
1221  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1222  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1223 }
1224 TEST_CASE("FuseHardSwishIntoDivFloat32GpuAccTest")
1225 {
1226  ActivationDescriptor activationDescriptor;
1227  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1228 
1229  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1230  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1231 }
1232 
1233 // Test that all receiver layers follow by all activation layers work, either fused or not fused
1234 TEST_CASE("LayerFollowedByActivationFloat32GpuAccTest")
1235 {
1236  ActivationDescriptor activationDescriptor;
1237  for (int i = 0; i != 12; ++i)
1238  {
1239  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1240  activationDescriptor.m_A = 1.0f;
1241  activationDescriptor.m_B = -1.0f;
1242  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1243  {
1244  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1245  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1246  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
1247  (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
1248  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
1249  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1250  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
1251  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1252  CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1253  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1254  CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
1255  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1256  CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1257  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1258  CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
1259  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1260  }
1261  }
1262 }
1263 TEST_CASE("LayerFollowedByActivationFloat16GpuAccTest")
1264 {
1265  ActivationDescriptor activationDescriptor;
1266  for (int i = 0; i != 12; ++i)
1267  {
1268  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1269  activationDescriptor.m_A = 1.0f;
1270  activationDescriptor.m_B = -1.0f;
1271  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1272  {
1273  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1274  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1275  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1276  (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
1277  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1278  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1279  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1280  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1281  CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1282  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1283  CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
1284  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1285  CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1286  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1287  CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
1288  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1289  }
1290  }
1291 }
1292 TEST_CASE("LayerFollowedByActivationQAsymmU8GpuAccTest")
1293 {
1294  ActivationDescriptor activationDescriptor;
1295 
1296  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
1297  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1298  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
1299  static_cast<int>(activationDescriptor.m_Function));
1300  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1301  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
1302  static_cast<int>(activationDescriptor.m_Function));
1303 
1304  activationDescriptor.m_Function = ActivationFunction::TanH;
1305  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1306  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
1307  static_cast<int>(activationDescriptor.m_Function));
1308  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1309  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
1310  static_cast<int>(activationDescriptor.m_Function));
1311 
1312  activationDescriptor.m_Function = ActivationFunction::ReLu;
1313  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1314  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1315  static_cast<int>(activationDescriptor.m_Function));
1316  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1317  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1318  static_cast<int>(activationDescriptor.m_Function));
1319 
1320  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1321  activationDescriptor.m_A = 1.0f;
1322  activationDescriptor.m_B = -1.0f;
1323  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1324  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1325  static_cast<int>(activationDescriptor.m_Function));
1326  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1327  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1328  static_cast<int>(activationDescriptor.m_Function));
1329 
1330  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1331  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1332  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1333  static_cast<int>(activationDescriptor.m_Function));
1334  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1335  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1336  static_cast<int>(activationDescriptor.m_Function));
1337 }
1338 }
1339 #endif
TEST_SUITE("TestConstTensorLayerVisitor")
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
Definition: Network.cpp:337
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:66
bool m_BiasEnabled
Enable/disable bias.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
Definition: TestUtils.hpp:21
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
Definition: Network.cpp:292
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
Definition: Network.cpp:118
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:249
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
Definition: Network.cpp:332
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
const IOutputSlot * GetConnection() const override
Definition: Layer.hpp:204
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
Definition: Network.cpp:166
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, bool constantWeights)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:322
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
DataType
Definition: Types.hpp:48
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
Definition: Network.cpp:247
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:49
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:61
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
Definition: Network.cpp:237
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Definition: Network.cpp:85
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
Definition: Network.cpp:242
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:476
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:63
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:59
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
ActivationFunction
Definition: Types.hpp:86
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:467