ArmNN
 21.08
FuseActivationTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LayersFwd.hpp"
7 
8 #include <Network.hpp>
9 #include <ResolveType.hpp>
10 #include <armnn/INetwork.hpp>
11 #include "test/GraphUtils.hpp"
12 #include <test/TestUtils.hpp>
13 
14 #include <doctest/doctest.h>
15 
16 #include <QuantizeHelper.hpp>
17 #include <string>
18 
19 using namespace armnn;
20 
21 namespace
22 {
23 
24 template<typename T>
25 std::vector<T> GetVector(unsigned int size, float initial, float increment)
26 {
27  std::vector<float> typeVector(size, initial);
28  std::vector<T> vector(size);
29 
30  if (size > 1)
31  {
32  for (unsigned int i = 0; i < size; ++i)
33  {
34  vector[i] = T(initial + (increment * static_cast<float>(i)));
35  }
36  }
37  return vector;
38 }
39 
40 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
41 struct Convolution2dTest
42 {
44  static const bool isElementWise = false;
45  static const bool isConstTensorAsInputSupported = false;
46 
47  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
48  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
49  static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
50 
51  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
52  constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
53 
54  static IConnectableLayer* AddReceiverLayer(INetwork* network,
55  const char* name,
56  float scale = 1.f,
57  int32_t offset = 0)
58  {
59  Convolution2dDescriptor descriptor;
60  descriptor.m_DataLayout = DataLayout::NHWC;
61  descriptor.m_StrideX = 1;
62  descriptor.m_StrideY = 1;
63 
64  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
65  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
66  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
67  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
68  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
69  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
70  ConstTensor weights(weightsInfo, weightsVector);
71  Optional<ConstTensor> optionalBias;
72 
73  return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
74  }
75 
76  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
77  float scale = 1.f,
78  int32_t offset = 0)
79  {
80  IgnoreUnused(network);
81  IgnoreUnused(scale);
82  IgnoreUnused(offset);
83  return {};
84  }
85 };
86 
87 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
88 struct DWConvolution2dTest
89 {
90 public:
92  static const bool isElementWise = false;
93  static const bool isConstTensorAsInputSupported = false;
94 
95  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // [N,H,W,Cin]
96  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // [N,H,W,Cout]
97  static TensorShape GetWeightsShape() { return TensorShape( {1, 2, 2, 12}); } // [1,H,W,Cout]
98 
99  constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
100  constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
101 
102  static IConnectableLayer* AddReceiverLayer(INetwork* network,
103  const char* name,
104  float scale = 1.f,
105  int32_t offset = 0)
106  {
108  descriptor.m_BiasEnabled = false;
109  descriptor.m_DataLayout = DataLayout::NHWC;
110  descriptor.m_StrideX = 1;
111  descriptor.m_StrideY = 1;
112 
113  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
114  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
115  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
116  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
117  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
118  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
119  ConstTensor weights(weightsInfo, weightsVector);
120  Optional<ConstTensor> optionalBias;
121 
122  return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
123  }
124 
125  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
126  float scale = 1.f,
127  int32_t offset = 0)
128  {
129  IgnoreUnused(network);
130  IgnoreUnused(scale);
131  IgnoreUnused(offset);
132  return {};
133  }
134 };
135 
136 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
137 struct FullyConnectedTest
138 {
139 public:
141  static const bool isElementWise = false;
142  static const bool isConstTensorAsInputSupported = true;
143 
144  static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
145  static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
146  static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
147 
148  constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
149  constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
150 
151  static IConnectableLayer* AddReceiverLayer(INetwork* network,
152  const char* name,
153  float scale = 1.f,
154  int32_t offset = 0)
155  {
156  IgnoreUnused(scale);
157  IgnoreUnused(offset);
158 
159  FullyConnectedDescriptor descriptor;
160  descriptor.m_BiasEnabled = false;
161 
162  return network->AddFullyConnectedLayer(descriptor, name);
163  }
164 
165  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
166  float scale = 1.f,
167  int32_t offset = 0)
168  {
169  std::vector<float> weightsData = { 1, 2, 3, 4, 5,
170  6, 7, 8, 9, 10,
171  11, 12, 13, 14, 15};
172  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
173  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset, true);
174  ConstTensor weights(weightsInfo, weightsVector);
175 
176  IConnectableLayer* weightsLayer = network->AddConstantLayer(weights, "Weights");
177  weightsLayer->GetOutputSlot(0).SetTensorInfo(weightsInfo);
178 
179  std::vector<IConnectableLayer*> layers = { weightsLayer };
180  return layers;
181  }
182 };
183 
184 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
185 struct BatchNormTest
186 {
187 public:
189  static const bool isElementWise = false;
190  static const bool isConstTensorAsInputSupported = false;
191 
192  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
193  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
194 
195  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
196  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
197 
198  static IConnectableLayer* AddReceiverLayer(INetwork* network,
199  const char* name,
200  float scale = 1.f,
201  int32_t offset = 0)
202  {
203  IgnoreUnused(scale);
204  IgnoreUnused(offset);
205 
206  BatchNormalizationDescriptor descriptor;
207  descriptor.m_DataLayout = DataLayout::NHWC;
208 
209  std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
210  std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
211  std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
212  std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
213 
214  const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
215  ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType), betaVector);
216  ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType), gammaVector);
217  ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType), meanVector);
218  ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType), varianceVector);
219 
220  return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
221  }
222 
223  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
224  float scale = 1.f,
225  int32_t offset = 0)
226  {
227  IgnoreUnused(network);
228  IgnoreUnused(scale);
229  IgnoreUnused(offset);
230  return {};
231  }
232 };
233 
234 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
235 struct MultiplicationTest
236 {
238  static const bool isElementWise = true;
239  static const bool isConstTensorAsInputSupported = false;
240 
241  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
242  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
243 
244  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
245  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
246 
247  static IConnectableLayer* AddReceiverLayer(INetwork* network,
248  const char* name,
249  float scale = 1.f,
250  int32_t offset = 0)
251  {
252  IgnoreUnused(scale);
253  IgnoreUnused(offset);
254 
255  return network->AddMultiplicationLayer(name);
256  }
257 
258  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
259  float scale = 1.f,
260  int32_t offset = 0)
261  {
262  IgnoreUnused(network);
263  IgnoreUnused(scale);
264  IgnoreUnused(offset);
265  return {};
266  }
267 };
268 
269 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
270 struct AdditionTest
271 {
272  using LayerType = AdditionLayer;
273  static const bool isElementWise = true;
274  static const bool isConstTensorAsInputSupported = false;
275 
276  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
277  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
278 
279  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
280  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
281 
282  static IConnectableLayer* AddReceiverLayer(INetwork* network,
283  const char* name,
284  float scale = 1.f,
285  int32_t offset = 0)
286  {
287  IgnoreUnused(scale);
288  IgnoreUnused(offset);
289 
290  return network->AddAdditionLayer(name);
291  }
292 
293  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
294  float scale = 1.f,
295  int32_t offset = 0)
296  {
297  IgnoreUnused(network);
298  IgnoreUnused(scale);
299  IgnoreUnused(offset);
300  return {};
301  }
302 };
303 
304 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
305 struct SubtractionTest
306 {
307  using LayerType = SubtractionLayer;
308  static const bool isElementWise = true;
309  static const bool isConstTensorAsInputSupported = false;
310 
311  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
312  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
313 
314  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
315  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
316 
317  static IConnectableLayer* AddReceiverLayer(INetwork* network,
318  const char* name,
319  float scale = 1.f,
320  int32_t offset = 0)
321  {
322  IgnoreUnused(scale);
323  IgnoreUnused(offset);
324 
325  return network->AddSubtractionLayer(name);
326  }
327 
328  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
329  float scale = 1.f,
330  int32_t offset = 0)
331  {
332  IgnoreUnused(network);
333  IgnoreUnused(scale);
334  IgnoreUnused(offset);
335  return {};
336  }
337 };
338 
339 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
340 struct DivisionTest
341 {
342  using LayerType = DivisionLayer;
343  static const bool isElementWise = true;
344  static const bool isConstTensorAsInputSupported = false;
345 
346  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
347  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
348 
349  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
350  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
351 
352  static IConnectableLayer* AddReceiverLayer(INetwork* network,
353  const char* name,
354  float scale = 1.f,
355  int32_t offset = 0)
356  {
357  IgnoreUnused(scale);
358  IgnoreUnused(offset);
359 
360  return network->AddDivisionLayer(name);
361  }
362 
363  static std::vector<IConnectableLayer*> AddConstantLayers(INetwork* network,
364  float scale = 1.f,
365  int32_t offset = 0)
366  {
367  IgnoreUnused(network);
368  IgnoreUnused(scale);
369  IgnoreUnused(offset);
370  return {};
371  }
372 };
373 
374 template<typename LayerTest,
375  DataType ArmnnType>
376 INetworkPtr CreateNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
377  float scale, int32_t offset)
378 {
379  // Create a network
380  INetworkPtr network = INetwork::Create();
381 
382  IConnectableLayer* inputLayer = network->AddInputLayer(0);
383 
384  IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
385  "receiverLayer",
386  scale,
387  offset);
388 
389  IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
390  "activation");
391 
392  IConnectableLayer* outputLayer = network->AddOutputLayer(0);
393  IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
394 
395  // If ConstTensorAsInputs is supported weights and bias are stored as constant layers.
396  if(LayerTest::isConstTensorAsInputSupported)
397  {
398  std::vector<IConnectableLayer*> constantLayers = LayerTest::AddConstantLayers(network.get(),
399  scale,
400  offset);
401 
402  // Connect constant layers to receiverLayer.
403  for (unsigned int i = 0; i < constantLayers.size(); ++i)
404  {
405  constantLayers[i]->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(i + 1));
406  }
407  }
408 
409  // Define layers information
410  TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
411  TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
412 
413  // Set layer information
414  inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
415  receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
416  activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
417 
418  // Connect layers
419  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
420  receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
421  activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
422 
423  if (LayerTest::isElementWise)
424  {
425  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
426  }
427  if (preventFusing)
428  {
429  receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
430  }
431 
432  return network;
433 }
434 
435 template<typename LayerTest,
436  DataType ArmnnType,
437  typename LayerType = typename LayerTest::LayerType,
438  typename T = ResolveType<ArmnnType>>
439 void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
440  float scale = 1.f, int32_t offset=0)
441 {
442  // FIRST NETWORK: Fused
443  // Construct ArmNN network
444  INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
445 
446  // Create ArmNN runtime
447  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
448 
449  // Optimise ArmNN network
450  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
451 
452  Graph& graphFused = GetGraphForTesting(optNetFused.get());
453 
454  auto checkFusedConv2d = [](const Layer* const layer)->bool {
455  return IsLayerOfType<LayerType>(layer) &&
456  (layer->GetNameStr() == "fused-activation-into-receiverLayer");
457  };
458 
459  // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
460  if(LayerTest::isConstTensorAsInputSupported)
461  {
462  CHECK(4 == graphFused.GetNumLayers());
463  CHECK(CheckSequence(graphFused.cbegin(),
464  graphFused.cend(),
465  &IsLayerOfType<InputLayer>,
466  &IsLayerOfType<ConstantLayer>,
467  checkFusedConv2d,
468  &IsLayerOfType<OutputLayer>));
469 
470  // Check if new constant layer is connected to fused receiver layer.
471  Layer* fusedReceiverLayer = GetFirstLayerWithName(graphFused, "fused-activation-into-receiverLayer");
472  CHECK(fusedReceiverLayer);
473  CHECK(fusedReceiverLayer->GetInputSlot(1).GetConnection() != nullptr);
474  }
475  else
476  {
477  CHECK(3 == graphFused.GetNumLayers());
478  CHECK(CheckSequence(graphFused.cbegin(),
479  graphFused.cend(),
480  &IsLayerOfType<InputLayer>,
481  checkFusedConv2d,
482  &IsLayerOfType<OutputLayer>));
483  }
484 
485  // Load network into runtime
486  NetworkId networkIdentifier;
487  CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
488 
489  //Creates structures for inputs and outputs.
490  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
491  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
492  std::vector<T> outputDataFused(LayerTest::outputSize);
493 
494  InputTensors inputTensorsFused{
495  {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
496  OutputTensors outputTensorsFused{
497  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
498 
499  // Execute network
500  CHECK(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) == Status::Success);
501 
502  // SECOND NETWORK: NotFused
503  // Construct ArmNN network
504  INetworkPtr networkNotFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
505 
506  // Create ArmNN runtime
507  IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
508 
509  // Optimise ArmNN network
510  IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
511 
512  Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
513 
514  // If ConstTensorAsInputs is supported, weights and bias are stored as constant layers.
515  if(LayerTest::isConstTensorAsInputSupported)
516  {
517  CHECK(6 == graphNotFused.GetNumLayers());
518  CHECK(CheckSequence(graphNotFused.cbegin(),
519  graphNotFused.cend(),
520  &IsLayerOfType<InputLayer>,
521  &IsLayerOfType<ConstantLayer>,
522  &IsLayerOfType<LayerType>,
523  &IsLayerOfType<ActivationLayer>,
524  &IsLayerOfType<OutputLayer>,
525  &IsLayerOfType<OutputLayer>));
526  }
527  else
528  {
529  CHECK(5 == graphNotFused.GetNumLayers());
530  CHECK(CheckSequence(graphNotFused.cbegin(),
531  graphNotFused.cend(),
532  &IsLayerOfType<InputLayer>,
533  &IsLayerOfType<LayerType>,
534  &IsLayerOfType<ActivationLayer>,
535  &IsLayerOfType<OutputLayer>,
536  &IsLayerOfType<OutputLayer>));
537  }
538 
539  // Load network into runtime
540  NetworkId networkIdentifierNotFused;
541  CHECK(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
542 
543  //Creates structures for inputs and outputs.
544  std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
545  std::vector<T> outputDataNotFused(LayerTest::outputSize);
546  std::vector<T> outputData2NotFused(LayerTest::outputSize);
547 
548  InputTensors inputTensorsNotFused{
549  {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
550  OutputTensors outputTensorsNotFused{
551  {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
552  {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
553 
554  // Execute network
555  CHECK(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
556  == Status::Success);
557 
558  // Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
559  for (unsigned int n = 0; n < outputDataFused.size(); ++n)
560  {
561  auto outputNotFused = static_cast<float>(outputDataNotFused[n]);
562  CHECK(static_cast<float>(outputDataFused[n]) == doctest::Approx(outputNotFused).epsilon(tolerance));
563  }
564 }
565 
566 template<typename LayerTest,
567  DataType ArmnnType,
568  typename LayerType = typename LayerTest::LayerType,
569  typename T = ResolveType<ArmnnType>>
570 bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
571  float scale = 1.f, int32_t offset = 0)
572 {
573  bool success;
574  try
575  {
576  // Construct ArmNN network
577  INetworkPtr networkFused = CreateNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
578 
579  // Create ArmNN runtime
580  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
581 
582  // Optimise ArmNN network
583  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
584 
585  // Load network into runtime
586  NetworkId networkIdentifier;
587  CHECK(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
588 
589  //Creates structures for inputs and outputs.
590  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
591  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
592  std::vector<T> outputDataFused(LayerTest::outputSize);
593 
594  InputTensors inputTensorsFused{
595  {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
596  OutputTensors outputTensorsFused{
597  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
598 
599  // Execute network
600  run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
601 
602  success = true;
603  }
604  catch (const std::exception& e)
605  {
606  std::cerr << e.what() << std::endl;
607  success = false;
608  }
609 
610  return success;
611 }
612 
613 }
614 
615 #if defined(ARMCOMPUTENEON_ENABLED)
616 TEST_SUITE("Optimizer")
617 {
618 // ReLu fused into Receiver Layers Float32
619 TEST_CASE("FuseReLUIntoConvFloat32CpuAccTest")
620 {
621  ActivationDescriptor activationDescriptor;
622  activationDescriptor.m_Function = ActivationFunction::ReLu;
623 
624  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
625  (activationDescriptor, 0.0001f, Compute::CpuAcc);
626 }
627 TEST_CASE("FuseReLUIntoDWConvFloat32CpuAccTest")
628 {
629  ActivationDescriptor activationDescriptor;
630  activationDescriptor.m_Function = ActivationFunction::ReLu;
631 
632  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
633  (activationDescriptor, 0.0001f, Compute::CpuAcc);
634 }
635 TEST_CASE("FuseReLUIntoFullyConnectedFloat32CpuAccTest")
636 {
637  ActivationDescriptor activationDescriptor;
638  activationDescriptor.m_Function = ActivationFunction::ReLu;
639 
640  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
641  (activationDescriptor, 0.0001f, Compute::CpuAcc);
642 }
643 TEST_CASE("FuseReLUIntoBatchNormFloat32CpuAccTest")
644 {
645  ActivationDescriptor activationDescriptor;
646  activationDescriptor.m_Function = ActivationFunction::ReLu;
647 
648  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
649  (activationDescriptor, 0.0001f, Compute::CpuAcc);
650 }
651 
652 // BoundedReLu fused into Receiver Layers Float32
653 TEST_CASE("FuseBoundedReLUIntoConvFloat32CpuAccTest")
654 {
655  ActivationDescriptor activationDescriptor;
656  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
657  activationDescriptor.m_A = 1.0f;
658  activationDescriptor.m_B = -1.0f;
659 
660  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
661  (activationDescriptor, 0.0001f, Compute::CpuAcc);
662 }
663 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32CpuAccTest")
664 {
665  ActivationDescriptor activationDescriptor;
666  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
667  activationDescriptor.m_A = 1.0f;
668  activationDescriptor.m_B = -1.0f;
669 
670  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
671  (activationDescriptor, 0.0001f, Compute::CpuAcc);
672 }
673 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest")
674 {
675  ActivationDescriptor activationDescriptor;
676  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
677  activationDescriptor.m_A = 1.0f;
678  activationDescriptor.m_B = -1.0f;
679 
680  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
681  (activationDescriptor, 0.0001f, Compute::CpuAcc);
682 }
683 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32CpuAccTest")
684 {
685  ActivationDescriptor activationDescriptor;
686  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
687  activationDescriptor.m_A = 1.0f;
688  activationDescriptor.m_B = -1.0f;
689 
690  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
691  (activationDescriptor, 0.0001f, Compute::CpuAcc);
692 }
693 
694 // ReLU fused into Receiver Layers QAsymmU8
695 TEST_CASE("FuseReLUIntoConvQAsymmU8CpuAccTest")
696 {
697  ActivationDescriptor activationDescriptor;
698  activationDescriptor.m_Function = ActivationFunction::ReLu;
699 
700  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
701  (activationDescriptor, 0.0001f, Compute::CpuAcc);
702 }
703 TEST_CASE("FuseReLUIntoDWConvQAsymmU8CpuAccTest")
704 {
705  ActivationDescriptor activationDescriptor;
706  activationDescriptor.m_Function = ActivationFunction::ReLu;
707 
708  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
709  (activationDescriptor, 0.0001f, Compute::CpuAcc);
710 }
711 TEST_CASE("FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest")
712 {
713  ActivationDescriptor activationDescriptor;
714  activationDescriptor.m_Function = ActivationFunction::ReLu;
715 
716  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
717  (activationDescriptor, 0.0001f, Compute::CpuAcc);
718 }
719 
720 // BoundedReLu fused into Receiver Layers QAsymmS8
721 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8CpuAccTest")
722 {
723  ActivationDescriptor activationDescriptor;
724  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
725  activationDescriptor.m_A = 6.0f;
726  activationDescriptor.m_B = 0.0f;
727 
728  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
729  (activationDescriptor, 0.0001f, Compute::CpuAcc);
730 }
731 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest")
732 {
733  ActivationDescriptor activationDescriptor;
734  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
735  activationDescriptor.m_A = 6.0f;
736  activationDescriptor.m_B = 0.0f;
737 
738  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
739  (activationDescriptor, 0.0001f, Compute::CpuAcc);
740 }
741 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest")
742 {
743  ActivationDescriptor activationDescriptor;
744  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
745  activationDescriptor.m_A = 6.0f;
746  activationDescriptor.m_B = 0.0f;
747 
748  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
749  (activationDescriptor, 0.0001f, Compute::CpuAcc);
750 }
751 
752 // TanH fused into Receiver Layers Float32
753 TEST_CASE("FuseTanHIntoConvFloat32CpuAccTest")
754 {
755  ActivationDescriptor activationDescriptor;
756  activationDescriptor.m_Function = ActivationFunction::TanH;
757 
758  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
759  (activationDescriptor, 0.0001f, Compute::CpuAcc);
760 }
761 
762 // HardSwish fused into Receiver Layers Float32
763 TEST_CASE("FuseHardSwishIntoConvFloat32CpuAccTest")
764 {
765  ActivationDescriptor activationDescriptor;
766  activationDescriptor.m_Function = ActivationFunction::HardSwish;
767 
768  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
769  (activationDescriptor, 0.0001f, Compute::CpuAcc);
770 }
771 
772 // Test that all receiver layers follow by all activation layers work, either fused or not fused
773 TEST_CASE("LayerFollowedByActivationFloat32CpuAccTest")
774 {
775  ActivationDescriptor activationDescriptor;
776  for (int i = 0; i != 12; ++i)
777  {
778  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
779  activationDescriptor.m_A = 1.0f;
780  activationDescriptor.m_B = -1.0f;
781  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
782  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
783  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
784  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
785  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
786  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
787  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
788  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
789  }
790 }
791 TEST_CASE("LayerFollowedByActivationFloat16CpuAccTest")
792 {
793  ActivationDescriptor activationDescriptor;
794  for (int i = 0; i != 12; ++i)
795  {
796  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
797  activationDescriptor.m_A = 1.0f;
798  activationDescriptor.m_B = -1.0f;
799  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
800  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
801  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
802  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
803  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
804  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
805  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
806  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
807  }
808 }
809 TEST_CASE("LayerFollowedByActivationQAsymmU8CpuAccTest")
810 {
811  ActivationDescriptor activationDescriptor;
812 
813  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
814  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
815  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
816  static_cast<int>(activationDescriptor.m_Function));
817  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
818  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
819  static_cast<int>(activationDescriptor.m_Function));
820 
821  activationDescriptor.m_Function = ActivationFunction::TanH;
822  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
823  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
824  static_cast<int>(activationDescriptor.m_Function));
825  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
826  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
827  static_cast<int>(activationDescriptor.m_Function));
828 
829  activationDescriptor.m_Function = ActivationFunction::ReLu;
830  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
831  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
832  static_cast<int>(activationDescriptor.m_Function));
833  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
834  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
835  static_cast<int>(activationDescriptor.m_Function));
836 
837  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
838  activationDescriptor.m_A = 1.0f;
839  activationDescriptor.m_B = -1.0f;
840  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
841  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
842  static_cast<int>(activationDescriptor.m_Function));
843  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
844  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
845  static_cast<int>(activationDescriptor.m_Function));
846 
847  activationDescriptor.m_Function = ActivationFunction::HardSwish;
848  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
849  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
850  static_cast<int>(activationDescriptor.m_Function));
851  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
852  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
853  static_cast<int>(activationDescriptor.m_Function));
854 }
855 }
856 #endif
857 
858 #if defined(ARMCOMPUTECL_ENABLED)
859 TEST_SUITE("Optimizer")
860 {
861 // ReLu fused into Receiver Layers Float32
862 TEST_CASE("FuseReLUIntoConvFloat32GpuAccTest")
863 {
864  ActivationDescriptor activationDescriptor;
865  activationDescriptor.m_Function = ActivationFunction::ReLu;
866 
867  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
868  (activationDescriptor, 0.0001f, Compute::GpuAcc);
869 }
870 TEST_CASE("FuseReLUIntoDWConvFloat32GpuAccTest")
871 {
872  ActivationDescriptor activationDescriptor;
873  activationDescriptor.m_Function = ActivationFunction::ReLu;
874 
875  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
876  (activationDescriptor, 0.0001f, Compute::GpuAcc);
877 }
878 TEST_CASE("FuseReLUIntoFullyConnectedFloat32GpuAccTest")
879 {
880  ActivationDescriptor activationDescriptor;
881  activationDescriptor.m_Function = ActivationFunction::ReLu;
882 
883  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
884  (activationDescriptor, 0.0001f, Compute::GpuAcc);
885 }
886 TEST_CASE("FuseReLUIntoBatchNormFloat32GpuAccTest")
887 {
888  ActivationDescriptor activationDescriptor;
889  activationDescriptor.m_Function = ActivationFunction::ReLu;
890 
891  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
892  (activationDescriptor, 0.0001f, Compute::GpuAcc);
893 }
894 TEST_CASE("FuseReLUIntoMulFloat32GpuAccTest")
895 {
896  ActivationDescriptor activationDescriptor;
897  activationDescriptor.m_Function = ActivationFunction::ReLu;
898 
899  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
900  (activationDescriptor, 0.0001f, Compute::GpuAcc);
901 }
902 TEST_CASE("FuseReLUIntoAddFloat32GpuAccTest")
903 {
904  ActivationDescriptor activationDescriptor;
905  activationDescriptor.m_Function = ActivationFunction::ReLu;
906 
907  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
908  (activationDescriptor, 0.0001f, Compute::GpuAcc);
909 }
910 TEST_CASE("FuseReLUIntoSubFloat32GpuAccTest")
911 {
912  ActivationDescriptor activationDescriptor;
913  activationDescriptor.m_Function = ActivationFunction::ReLu;
914 
915  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
916  (activationDescriptor, 0.0001f, Compute::GpuAcc);
917 }
918 TEST_CASE("FuseReLUIntoDivFloat32GpuAccTest")
919 {
920  ActivationDescriptor activationDescriptor;
921  activationDescriptor.m_Function = ActivationFunction::ReLu;
922 
923  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
924  (activationDescriptor, 0.0001f, Compute::GpuAcc);
925 }
926 
927 // BoundedReLu fused into Receiver Layers Float32
928 TEST_CASE("FuseBoundedReLUIntoConvFloat32GpuAccTest")
929 {
930  ActivationDescriptor activationDescriptor;
931  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
932  activationDescriptor.m_A = 1.0f;
933  activationDescriptor.m_B = -1.0f;
934 
935  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
936  (activationDescriptor, 0.0001f, Compute::GpuAcc);
937 }
938 TEST_CASE("FuseBoundedReLUIntoDWConvFloat32GpuAccTest")
939 {
940  ActivationDescriptor activationDescriptor;
941  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
942  activationDescriptor.m_A = 1.0f;
943  activationDescriptor.m_B = -1.0f;
944 
945  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
946  (activationDescriptor, 0.0001f, Compute::GpuAcc);
947 }
948 TEST_CASE("FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest")
949 {
950  ActivationDescriptor activationDescriptor;
951  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
952  activationDescriptor.m_A = 1.0f;
953  activationDescriptor.m_B = -1.0f;
954 
955  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
956  (activationDescriptor, 0.0001f, Compute::GpuAcc);
957 }
958 TEST_CASE("FuseBoundedReLUIntoBatchNormFloat32GpuAccTest")
959 {
960  ActivationDescriptor activationDescriptor;
961  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
962  activationDescriptor.m_A = 1.0f;
963  activationDescriptor.m_B = -1.0f;
964 
965  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
966  (activationDescriptor, 0.0001f, Compute::GpuAcc);
967 }
968 TEST_CASE("FuseBoundedReLUIntoMulFloat32GpuAccTest")
969 {
970  ActivationDescriptor activationDescriptor;
971  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
972  activationDescriptor.m_A = 1.0f;
973  activationDescriptor.m_B = -1.0f;
974 
975  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
976  (activationDescriptor, 0.0001f, Compute::GpuAcc);
977 }
978 TEST_CASE("FuseBoundedReLUIntoAddFloat32GpuAccTest")
979 {
980  ActivationDescriptor activationDescriptor;
981  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
982  activationDescriptor.m_A = 1.0f;
983  activationDescriptor.m_B = -1.0f;
984 
985  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
986  (activationDescriptor, 0.0001f, Compute::GpuAcc);
987 }
988 TEST_CASE("FuseBoundedReLUIntoSubFloat32GpuAccTest")
989 {
990  ActivationDescriptor activationDescriptor;
991  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
992  activationDescriptor.m_A = 1.0f;
993  activationDescriptor.m_B = -1.0f;
994 
995  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
996  (activationDescriptor, 0.0001f, Compute::GpuAcc);
997 }
998 TEST_CASE("FuseBoundedReLUIntoDivFloat32GpuAccTest")
999 {
1000  ActivationDescriptor activationDescriptor;
1001  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1002  activationDescriptor.m_A = 1.0f;
1003  activationDescriptor.m_B = -1.0f;
1004 
1005  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1006  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1007 }
1008 
1009 // ReLu fused into Receiver Layers Float16
1010 TEST_CASE("FuseReLUIntoConvFloat16GpuAccTest")
1011 {
1012  ActivationDescriptor activationDescriptor;
1013  activationDescriptor.m_Function = ActivationFunction::ReLu;
1014 
1015  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1016  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1017 }
1018 TEST_CASE("FuseReLUIntoDWConvFloat16GpuAccTest")
1019 {
1020  ActivationDescriptor activationDescriptor;
1021  activationDescriptor.m_Function = ActivationFunction::ReLu;
1022 
1023  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1024  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1025 }
1026 TEST_CASE("FuseReLUIntoFullyConnectedFloat16GpuAccTest")
1027 {
1028  ActivationDescriptor activationDescriptor;
1029  activationDescriptor.m_Function = ActivationFunction::ReLu;
1030 
1031  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1032  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1033 }
1034 TEST_CASE("FuseReLUIntoBatchNormFloat16GpuAccTest")
1035 {
1036  ActivationDescriptor activationDescriptor;
1037  activationDescriptor.m_Function = ActivationFunction::ReLu;
1038 
1039  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1040  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1041 }
1042 TEST_CASE("FuseReLUIntoMulFloat16GpuAccTest")
1043 {
1044  ActivationDescriptor activationDescriptor;
1045  activationDescriptor.m_Function = ActivationFunction::ReLu;
1046 
1047  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1048  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1049 }
1050 TEST_CASE("FuseReLUIntoAddFloat16GpuAccTest")
1051 {
1052  ActivationDescriptor activationDescriptor;
1053  activationDescriptor.m_Function = ActivationFunction::ReLu;
1054 
1055  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>, DataType::Float16>
1056  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1057 }
1058 TEST_CASE("FuseReLUIntoSubFloat16GpuAccTest")
1059 {
1060  ActivationDescriptor activationDescriptor;
1061  activationDescriptor.m_Function = ActivationFunction::ReLu;
1062 
1063  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1064  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1065 }
1066 TEST_CASE("FuseReLUIntoDivFloat16GpuAccTest")
1067 {
1068  ActivationDescriptor activationDescriptor;
1069  activationDescriptor.m_Function = ActivationFunction::ReLu;
1070 
1071  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>, DataType::Float16>
1072  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1073 }
1074 
1075 // ReLU fused into Receiver Layers QAsymmU8
1076 TEST_CASE("FuseReLUQIntoConvAsymmU8GpuAccTest")
1077 {
1078  ActivationDescriptor activationDescriptor;
1079  activationDescriptor.m_Function = ActivationFunction::ReLu;
1080 
1081  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1082  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1083 }
1084 TEST_CASE("FuseReLUQIntoDWConvAsymmU8GpuAccTest")
1085 {
1086  ActivationDescriptor activationDescriptor;
1087  activationDescriptor.m_Function = ActivationFunction::ReLu;
1088 
1089  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1090  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1091 }
1092 TEST_CASE("FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest")
1093 {
1094  ActivationDescriptor activationDescriptor;
1095  activationDescriptor.m_Function = ActivationFunction::ReLu;
1096 
1097  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1098  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1099 }
1100 
1101 // BoundedReLu fused into Receiver Layers QAsymmS8
1102 TEST_CASE("FuseBoundedReLUIntoConvQASymmS8GpuAccTest")
1103 {
1104  ActivationDescriptor activationDescriptor;
1105  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1106  activationDescriptor.m_A = 6.0f;
1107  activationDescriptor.m_B = 0.0f;
1108 
1109  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1110  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1111 }
1112 TEST_CASE("FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest")
1113 {
1114  ActivationDescriptor activationDescriptor;
1115  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1116  activationDescriptor.m_A = 6.0f;
1117  activationDescriptor.m_B = 0.0f;
1118 
1119  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
1120  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1121 }
1122 TEST_CASE("FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest")
1123 {
1124  ActivationDescriptor activationDescriptor;
1125  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1126  activationDescriptor.m_A = 6.0f;
1127  activationDescriptor.m_B = 0.0f;
1128 
1129  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
1130  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1131 }
1132 
1133 // TanH fused into Receiver Layers Float32
1134 TEST_CASE("FuseTanHIntoConvFloat32GpuAccTest")
1135 {
1136  ActivationDescriptor activationDescriptor;
1137  activationDescriptor.m_Function = ActivationFunction::TanH;
1138 
1139  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1140  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1141 }
1142 TEST_CASE("FuseTanHIntoMulFloat32GpuAccTest")
1143 {
1144  ActivationDescriptor activationDescriptor;
1145  activationDescriptor.m_Function = ActivationFunction::TanH;
1146 
1147  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1148  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1149 }
1150 TEST_CASE("FuseTanHIntoAddFloat32GpuAccTest")
1151 {
1152  ActivationDescriptor activationDescriptor;
1153  activationDescriptor.m_Function = ActivationFunction::TanH;
1154 
1155  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1156  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1157 }
1158 TEST_CASE("FuseTanHIntoSubFloat32GpuAccTest")
1159 {
1160  ActivationDescriptor activationDescriptor;
1161  activationDescriptor.m_Function = ActivationFunction::TanH;
1162 
1163  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1164  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1165 }
1166 TEST_CASE("FuseTanHIntoDivFloat32GpuAccTest")
1167 {
1168  ActivationDescriptor activationDescriptor;
1169  activationDescriptor.m_Function = ActivationFunction::TanH;
1170 
1171  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1172  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1173 }
1174 
1175 // HardSwish fused into Receiver Layers Float32
1176 TEST_CASE("FuseHardSwishIntoConvFloat32GpuAccTest")
1177 {
1178  ActivationDescriptor activationDescriptor;
1179  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1180 
1181  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1182  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1183 }
1184 TEST_CASE("FuseHardSwishIntoMulFloat32GpuAccTest")
1185 {
1186  ActivationDescriptor activationDescriptor;
1187  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1188 
1189  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1190  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1191 }
1192 TEST_CASE("FuseHardSwishIntoAddFloat32GpuAccTest")
1193 {
1194  ActivationDescriptor activationDescriptor;
1195  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1196 
1197  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1198  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1199 }
1200 TEST_CASE("FuseHardSwishIntoSubFloat32GpuAccTest")
1201 {
1202  ActivationDescriptor activationDescriptor;
1203  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1204 
1205  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1206  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1207 }
1208 TEST_CASE("FuseHardSwishIntoDivFloat32GpuAccTest")
1209 {
1210  ActivationDescriptor activationDescriptor;
1211  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1212 
1213  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1214  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1215 }
1216 
1217 // Test that all receiver layers follow by all activation layers work, either fused or not fused
1218 TEST_CASE("LayerFollowedByActivationFloat32GpuAccTest")
1219 {
1220  ActivationDescriptor activationDescriptor;
1221  for (int i = 0; i != 12; ++i)
1222  {
1223  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1224  activationDescriptor.m_A = 1.0f;
1225  activationDescriptor.m_B = -1.0f;
1226  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1227  {
1228  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1229  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1230  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
1231  (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
1232  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
1233  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1234  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
1235  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1236  CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1237  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1238  CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
1239  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1240  CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1241  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1242  CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
1243  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1244  }
1245  }
1246 }
1247 TEST_CASE("LayerFollowedByActivationFloat16GpuAccTest")
1248 {
1249  ActivationDescriptor activationDescriptor;
1250  for (int i = 0; i != 12; ++i)
1251  {
1252  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1253  activationDescriptor.m_A = 1.0f;
1254  activationDescriptor.m_B = -1.0f;
1255  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1256  {
1257  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1258  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1259  CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1260  (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
1261  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1262  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1263  CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1264  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1265  CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1266  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1267  CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
1268  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1269  CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1270  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1271  CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
1272  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1273  }
1274  }
1275 }
1276 TEST_CASE("LayerFollowedByActivationQAsymmU8GpuAccTest")
1277 {
1278  ActivationDescriptor activationDescriptor;
1279 
1280  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
1281  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1282  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
1283  static_cast<int>(activationDescriptor.m_Function));
1284  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1285  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
1286  static_cast<int>(activationDescriptor.m_Function));
1287 
1288  activationDescriptor.m_Function = ActivationFunction::TanH;
1289  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1290  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
1291  static_cast<int>(activationDescriptor.m_Function));
1292  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1293  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
1294  static_cast<int>(activationDescriptor.m_Function));
1295 
1296  activationDescriptor.m_Function = ActivationFunction::ReLu;
1297  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1298  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1299  static_cast<int>(activationDescriptor.m_Function));
1300  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1301  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1302  static_cast<int>(activationDescriptor.m_Function));
1303 
1304  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1305  activationDescriptor.m_A = 1.0f;
1306  activationDescriptor.m_B = -1.0f;
1307  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1308  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1309  static_cast<int>(activationDescriptor.m_Function));
1310  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1311  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1312  static_cast<int>(activationDescriptor.m_Function));
1313 
1314  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1315  CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1316  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1317  static_cast<int>(activationDescriptor.m_Function));
1318  CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1319  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1320  static_cast<int>(activationDescriptor.m_Function));
1321 }
1322 }
1323 #endif
TEST_SUITE("TestConstTensorLayerVisitor")
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
Definition: Network.cpp:383
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
bool m_BiasEnabled
Enable/disable bias.
armnn::Layer * GetFirstLayerWithName(armnn::Graph &graph, const std::string &name)
Definition: GraphUtils.cpp:22
IConnectableLayer * AddConstantLayer(const ConstTensor &input, const char *name=nullptr)
Adds a layer with no inputs and a single output, which always corresponds to the passed in constant t...
Definition: Network.cpp:338
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
Definition: Network.cpp:123
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:30
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:177
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:360
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Definition: Network.cpp:85
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
Definition: Network.cpp:378
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
const IOutputSlot * GetConnection() const override
Definition: Layer.hpp:199
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const char *name=nullptr)
Adds a fully connected layer to the network.
Definition: Network.cpp:182
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, bool constantWeights)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
const InputSlot & GetInputSlot(unsigned int index) const override
Get a const input slot handle by slot index.
Definition: Layer.hpp:316
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
DataType
Definition: Types.hpp:35
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1613
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
int NetworkId
Definition: IRuntime.hpp:24
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:361
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:173
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
Definition: Network.cpp:279
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:50
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
Definition: Network.cpp:269
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
Definition: TestUtils.hpp:21
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
Definition: Network.cpp:274
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:172
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:530
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:52
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
ActivationFunction
Definition: Types.hpp:66
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:405