ArmNN
 20.11
FuseActivationTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LayersFwd.hpp"
7 
8 #include <Network.hpp>
9 #include <ResolveType.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <test/TestUtils.hpp>
12 
13 #include <boost/test/unit_test.hpp>
14 
15 #include <QuantizeHelper.hpp>
16 #include <string>
17 
18 using namespace armnn;
19 
21 
22 namespace
23 {
24 const float g_qScale = 1.0f;
25 const int32_t g_qOffset = 0;
26 
27 template<typename T>
28 std::vector<T> GetVector(unsigned int size, float initial, float increment)
29 {
30  std::vector<float> typeVector(size, initial);
31  std::vector<T> vector(size);
32 
33  if (size > 1)
34  {
35  for (unsigned int i = 0; i < size; ++i)
36  {
37  vector[i] = T(initial + (increment * static_cast<float>(i)));
38  }
39  }
40  return vector;
41 }
42 
43 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
44 struct Convolution2dTest
45 {
47  static std::string GetReceiverLayerName() { return "Convolution2d"; };
48  static const bool isElementWise = false;
49 
50  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
51  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
52  static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
53 
54  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
55  constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
56 
57  static IConnectableLayer* AddReceiverLayer(INetwork* network,
58  const char* name)
59  {
60  Convolution2dDescriptor descriptor;
61  descriptor.m_BiasEnabled = false;
62  descriptor.m_DataLayout = DataLayout::NHWC;
63  descriptor.m_StrideX = 1;
64  descriptor.m_StrideY = 1;
65 
66  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
67  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
68  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
69  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
70  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
71  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
72  ConstTensor weights(weightsInfo, weightsVector);
73  Optional<ConstTensor> optionalBias;
74 
75  return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
76  }
77 };
78 
79 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
81 {
82 public:
84  static std::string GetReceiverLayerName() { return "DepthwiseConvolution2d"; };
85  static const bool isElementWise = false;
86 
87  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
88  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // NHWCout
89  static TensorShape GetWeightsShape() { return TensorShape( {4, 3, 2, 2}); } // MCinHW
90 
91  constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
92  constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
93 
94  static IConnectableLayer* AddReceiverLayer(INetwork* network,
95  const char* name)
96  {
98  descriptor.m_BiasEnabled = false;
99  descriptor.m_DataLayout = DataLayout::NHWC;
100  descriptor.m_StrideX = 1;
101  descriptor.m_StrideY = 1;
102 
103  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
104  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
105  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
106  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
107  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
108  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
109  ConstTensor weights(weightsInfo, weightsVector);
110  Optional<ConstTensor> optionalBias;
111 
112  return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
113  }
114 };
115 
116 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
117 struct FullyConnectedTest
118 {
119 public:
121  static std::string GetReceiverLayerName() { return "FullyConnected"; };
122  static const bool isElementWise = false;
123 
124  static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
125  static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
126  static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
127 
128  constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
129  constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
130 
131  static IConnectableLayer* AddReceiverLayer(INetwork* network,
132  const char* name)
133  {
134  FullyConnectedDescriptor descriptor;
135  descriptor.m_BiasEnabled = false;
136 
137  std::vector<float> weightsData = { 1, 2, 3, 4, 5,
138  6, 7, 8, 9, 10,
139  11, 12, 13, 14, 15};
140  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, g_qScale, g_qOffset);
141  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, g_qScale, g_qOffset);
142  ConstTensor weights(weightsInfo, weightsVector);
143  Optional<ConstTensor> optionalBias;
144 
145  return network->AddFullyConnectedLayer(descriptor, weights, optionalBias, name);
146  }
147 };
148 
149 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
150 struct BatchNormTest
151 {
152 public:
154  static std::string GetReceiverLayerName() { return "BatchNorm"; };
155  static const bool isElementWise = false;
156 
157  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
158  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
159 
160  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
161  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
162 
163  static IConnectableLayer* AddReceiverLayer(INetwork* network,
164  const char* name)
165  {
166  BatchNormalizationDescriptor descriptor;
167  descriptor.m_DataLayout = DataLayout::NHWC;
168 
169  std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
170  std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
171  std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
172  std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
173 
174  const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
175  ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType), betaVector);
176  ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType), gammaVector);
177  ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType), meanVector);
178  ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType), varianceVector);
179 
180  return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
181  }
182 };
183 
184 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
185 struct MultiplicationTest
186 {
188  static std::string GetReceiverLayerName() { return "Multiplication"; };
189  static const bool isElementWise = true;
190 
191  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
192  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
193 
194  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
195  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
196 
197  static IConnectableLayer* AddReceiverLayer(INetwork* network,
198  const char* name)
199  {
200  return network->AddMultiplicationLayer(name);
201  }
202 };
203 
204 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
205 struct AdditionTest
206 {
208  static std::string GetReceiverLayerName() { return "Addition"; };
209  static const bool isElementWise = true;
210 
211  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
212  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
213 
214  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
215  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
216 
217  static IConnectableLayer* AddReceiverLayer(INetwork* network,
218  const char* name)
219  {
220  return network->AddAdditionLayer(name);
221  }
222 };
223 
224 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
225 struct SubtractionTest
226 {
228  static std::string GetReceiverLayerName() { return "Subtraction"; };
229  static const bool isElementWise = true;
230 
231  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
232  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
233 
234  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
235  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
236 
237  static IConnectableLayer* AddReceiverLayer(INetwork* network,
238  const char* name)
239  {
240  return network->AddSubtractionLayer(name);
241  }
242 };
243 
244 template<armnn::DataType ArmnnType, typename T = armnn::ResolveType<ArmnnType>>
245 struct DivisionTest
246 {
248  static std::string GetReceiverLayerName() { return "Division"; };
249  static const bool isElementWise = true;
250 
251  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
252  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
253 
254  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
255  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
256 
257  static IConnectableLayer* AddReceiverLayer(INetwork* network,
258  const char* name)
259  {
260  return network->AddDivisionLayer(name);
261  }
262 };
263 
264 } // namespace
265 
266 template<typename LayerTest,
267  armnn::DataType ArmnnType>
268 INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing)
269 {
270  // Create a network
271  INetworkPtr network = INetwork::Create();
272 
273  IConnectableLayer* inputLayer = network->AddInputLayer(0);
274 
275  IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
276  "receiverLayer");
277 
278  IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
279  "activation");
280 
281  IConnectableLayer* outputLayer = network->AddOutputLayer(0);
282  IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
283 
284  // Define layers information
285  TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, g_qScale, g_qOffset);
286  TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, g_qScale, g_qOffset);
287 
288  // Set layer information
289  inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
290  receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
291  activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
292 
293  // Connect layers
294  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
295  receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
296  activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
297 
298  if (LayerTest::isElementWise)
299  {
300  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
301  }
302  if (preventFusing)
303  {
304  receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
305  }
306 
307  return network;
308 }
309 
310 template<typename LayerTest,
311  armnn::DataType ArmnnType,
312  typename LayerType = typename LayerTest::LayerType,
313  typename T = armnn::ResolveType<ArmnnType>>
315 backendId)
316 {
317  // FIRST NETWORK: Fused
318  // Construct ArmNN network
319  INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false);
320 
321  // Create ArmNN runtime
322  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
323 
324  // Optimise ArmNN network
325  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
326 
327  Graph graphFused = PolymorphicDowncast<OptimizedNetwork*>(optNetFused.get())->GetGraph();
328 
329  auto checkFusedConv2d = [](const armnn::Layer* const layer)->bool {
330  return IsLayerOfType<LayerType>(layer) &&
331  (layer->GetNameStr() == "fused-activation-into-receiverLayer");
332  };
333 
334  BOOST_CHECK_MESSAGE(3 == graphFused.GetNumLayers(), LayerTest::GetReceiverLayerName());
335  BOOST_TEST(CheckSequence(graphFused.cbegin(),
336  graphFused.cend(),
337  &IsLayerOfType<InputLayer>,
338  checkFusedConv2d,
339  &IsLayerOfType<OutputLayer>));
340 
341  // Load network into runtime
342  NetworkId networkIdentifier;
343  BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
344 
345  //Creates structures for inputs and outputs.
346  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
347  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, g_qScale, g_qOffset);
348  std::vector<T> outputDataFused(LayerTest::outputSize);
349 
350  InputTensors inputTensorsFused{
351  {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
352  OutputTensors outputTensorsFused{
353  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
354 
355  // Execute network
356  run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
357 
358  // SECOND NETWORK: NotFused
359  // Construct ArmNN network
360  INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true);
361 
362  // Create ArmNN runtime
363  IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
364 
365  // Optimise ArmNN network
366  IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
367 
368  Graph graphNotFused = PolymorphicDowncast<OptimizedNetwork*>(optNetNotFused.get())->GetGraph();
369 
370  BOOST_CHECK(5 == graphNotFused.GetNumLayers());
371  BOOST_TEST(CheckSequence(graphNotFused.cbegin(),
372  graphNotFused.cend(),
373  &IsLayerOfType<armnn::InputLayer>,
374  &IsLayerOfType<LayerType>,
375  &IsLayerOfType<armnn::ActivationLayer>,
376  &IsLayerOfType<armnn::OutputLayer>,
377  &IsLayerOfType<armnn::OutputLayer>));
378 
379  // Load network into runtime
380  NetworkId networkIdentifierNotFused;
381  BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
382 
383  //Creates structures for inputs and outputs.
384  std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, g_qScale, g_qOffset);
385  std::vector<T> outputDataNotFused(LayerTest::outputSize);
386  std::vector<T> outputData2NotFused(LayerTest::outputSize);
387 
388  InputTensors inputTensorsNotFused{
389  {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
390  OutputTensors outputTensorsNotFused{
391  {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
392  {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
393 
394  // Execute network
395  runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused);
396 
397  // Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
398  for (unsigned int n = 0; n < outputDataFused.size(); ++n)
399  {
400  BOOST_CHECK_CLOSE(static_cast<float>(outputDataFused[n]), static_cast<float>(outputDataNotFused[n]),
401  T(tolerance));
402  }
403 }
404 
405 #if defined(ARMCOMPUTENEON_ENABLED)
406 // ReLu fused into Receiver Layers Float32
407 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32CpuAccTest)
408 {
409  ActivationDescriptor activationDescriptor;
410  activationDescriptor.m_Function = ActivationFunction::ReLu;
411 
412  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
413  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
414 }
415 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32CpuAccTest)
416 {
417  ActivationDescriptor activationDescriptor;
418  activationDescriptor.m_Function = ActivationFunction::ReLu;
419 
420  FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
421  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
422 }
423 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32CpuAccTest)
424 {
425  ActivationDescriptor activationDescriptor;
426  activationDescriptor.m_Function = ActivationFunction::ReLu;
427 
428  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
429  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
430 }
431 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32CpuAccTest)
432 {
433  ActivationDescriptor activationDescriptor;
434  activationDescriptor.m_Function = ActivationFunction::ReLu;
435 
436  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
437  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
438 }
439 
440 // BoundedReLu fused into Receiver Layers Float32
441 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvFloat32CpuAccTest)
442 {
443  ActivationDescriptor activationDescriptor;
444  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
445  activationDescriptor.m_A = 1.0f;
446  activationDescriptor.m_B = -1.0f;
447 
448  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
449  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
450 }
451 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32CpuAccTest)
452 {
453  ActivationDescriptor activationDescriptor;
454  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
455  activationDescriptor.m_A = 1.0f;
456  activationDescriptor.m_B = -1.0f;
457 
458  FuseActivationIntoPreviousLayerTest < DepthwiseConvolution2dTest < DataType::Float32 > , DataType::Float32 >
459  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
460 }
461 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest)
462 {
463  ActivationDescriptor activationDescriptor;
464  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
465  activationDescriptor.m_A = 1.0f;
466  activationDescriptor.m_B = -1.0f;
467 
468  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
469  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
470 }
471 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32CpuAccTest)
472 {
473  ActivationDescriptor activationDescriptor;
474  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
475  activationDescriptor.m_A = 1.0f;
476  activationDescriptor.m_B = -1.0f;
477 
478  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
479  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
480 }
481 
482 // ReLU fused into Receiver Layers QAsymmU8
483 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvQAsymmU8CpuAccTest)
484 {
485  ActivationDescriptor activationDescriptor;
486  activationDescriptor.m_Function = ActivationFunction::ReLu;
487 
488  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
489  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
490 }
491 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvQAsymmU8CpuAccTest)
492 {
493  ActivationDescriptor activationDescriptor;
494  activationDescriptor.m_Function = ActivationFunction::ReLu;
495 
496  FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
497  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
498 }
499 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest)
500 {
501  ActivationDescriptor activationDescriptor;
502  activationDescriptor.m_Function = ActivationFunction::ReLu;
503 
504  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
505  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
506 }
507 
508 // HardSwish fused into Receiver Layers Float32
509 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoConvFloat32CpuAccTest)
510 {
511  ActivationDescriptor activationDescriptor;
512  activationDescriptor.m_Function = ActivationFunction::HardSwish;
513 
514  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
515  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
516 }
517 
518 // TanH fused into Receiver Layers Float32
519 BOOST_AUTO_TEST_CASE(FuseTanHIntoConvFloat32CpuAccTest)
520 {
521  ActivationDescriptor activationDescriptor;
522  activationDescriptor.m_Function = ActivationFunction::TanH;
523 
524  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
525  (activationDescriptor, 0.0001f, armnn::Compute::CpuAcc);
526 }
527 #endif
528 
529 #if defined(ARMCOMPUTECL_ENABLED)
530 // ReLu fused into Receiver Layers Float32
531 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32GpuAccTest)
532 {
533  ActivationDescriptor activationDescriptor;
534  activationDescriptor.m_Function = ActivationFunction::ReLu;
535 
536  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
537  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
538 }
539 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32GpuAccTest)
540 {
541  ActivationDescriptor activationDescriptor;
542  activationDescriptor.m_Function = ActivationFunction::ReLu;
543 
544  FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
545  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
546 }
547 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32GpuAccTest)
548 {
549  ActivationDescriptor activationDescriptor;
550  activationDescriptor.m_Function = ActivationFunction::ReLu;
551 
552  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
553  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
554 }
555 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32GpuAccTest)
556 {
557  ActivationDescriptor activationDescriptor;
558  activationDescriptor.m_Function = ActivationFunction::ReLu;
559 
560  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
561  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
562 }
563 BOOST_AUTO_TEST_CASE(FuseReLUIntoMulFloat32GpuAccTest)
564 {
565  ActivationDescriptor activationDescriptor;
566  activationDescriptor.m_Function = ActivationFunction::ReLu;
567 
568  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
569  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
570 }
571 BOOST_AUTO_TEST_CASE(FuseReLUIntoAddFloat32GpuAccTest)
572 {
573  ActivationDescriptor activationDescriptor;
574  activationDescriptor.m_Function = ActivationFunction::ReLu;
575 
576  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
577  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
578 }
579 BOOST_AUTO_TEST_CASE(FuseReLUIntoSubFloat32GpuAccTest)
580 {
581  ActivationDescriptor activationDescriptor;
582  activationDescriptor.m_Function = ActivationFunction::ReLu;
583 
584  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
585  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
586 }
587 BOOST_AUTO_TEST_CASE(FuseReLUIntoDivFloat32GpuAccTest)
588 {
589  ActivationDescriptor activationDescriptor;
590  activationDescriptor.m_Function = ActivationFunction::ReLu;
591 
592  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
593  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
594 }
595 
596 // BoundedReLu fused into Receiver Layers Float32
597 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvFloat32GpuAccTest)
598 {
599  ActivationDescriptor activationDescriptor;
600  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
601  activationDescriptor.m_A = 1.0f;
602  activationDescriptor.m_B = -1.0f;
603 
604  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
605  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
606 }
607 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32GpuAccTest)
608 {
609  ActivationDescriptor activationDescriptor;
610  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
611  activationDescriptor.m_A = 1.0f;
612  activationDescriptor.m_B = -1.0f;
613 
614  FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::Float32>, DataType::Float32>
615  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
616 }
617 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest)
618 {
619  ActivationDescriptor activationDescriptor;
620  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
621  activationDescriptor.m_A = 1.0f;
622  activationDescriptor.m_B = -1.0f;
623 
624  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
625  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
626 }
627 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32GpuAccTest)
628 {
629  ActivationDescriptor activationDescriptor;
630  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
631  activationDescriptor.m_A = 1.0f;
632  activationDescriptor.m_B = -1.0f;
633 
634  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
635  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
636 }
637 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoMulFloat32GpuAccTest)
638 {
639  ActivationDescriptor activationDescriptor;
640  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
641  activationDescriptor.m_A = 1.0f;
642  activationDescriptor.m_B = -1.0f;
643 
644  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
645  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
646 }
647 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoAddFloat32GpuAccTest)
648 {
649  ActivationDescriptor activationDescriptor;
650  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
651  activationDescriptor.m_A = 1.0f;
652  activationDescriptor.m_B = -1.0f;
653 
654  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
655  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
656 }
657 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoSubFloat32GpuAccTest)
658 {
659  ActivationDescriptor activationDescriptor;
660  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
661  activationDescriptor.m_A = 1.0f;
662  activationDescriptor.m_B = -1.0f;
663 
664  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
665  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
666 }
667 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDivFloat32GpuAccTest)
668 {
669  ActivationDescriptor activationDescriptor;
670  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
671  activationDescriptor.m_A = 1.0f;
672  activationDescriptor.m_B = -1.0f;
673 
674  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
675  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
676 }
677 
678 // ReLU fused into Receiver Layers QAsymmU8
679 BOOST_AUTO_TEST_CASE(FuseReLUQIntoConvAsymmU8GpuAccTest)
680 {
681  ActivationDescriptor activationDescriptor;
682  activationDescriptor.m_Function = ActivationFunction::ReLu;
683 
684  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
685  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
686 }
687 BOOST_AUTO_TEST_CASE(FuseReLUQIntoDWConvAsymmU8GpuAccTest)
688 {
689  ActivationDescriptor activationDescriptor;
690  activationDescriptor.m_Function = ActivationFunction::ReLu;
691 
692  FuseActivationIntoPreviousLayerTest<DepthwiseConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
693  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
694 }
695 BOOST_AUTO_TEST_CASE(FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest)
696 {
697  ActivationDescriptor activationDescriptor;
698  activationDescriptor.m_Function = ActivationFunction::ReLu;
699 
700  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
701  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
702 }
703 
704 // HardSwish fused into Receiver Layers Float32
705 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoConvFloat32GpuAccTest)
706 {
707  ActivationDescriptor activationDescriptor;
708  activationDescriptor.m_Function = ActivationFunction::HardSwish;
709 
710  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
711  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
712 }
713 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoMulFloat32GpuAccTest)
714 {
715  ActivationDescriptor activationDescriptor;
716  activationDescriptor.m_Function = ActivationFunction::HardSwish;
717 
718  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
719  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
720 }
721 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoAddFloat32GpuAccTest)
722 {
723  ActivationDescriptor activationDescriptor;
724  activationDescriptor.m_Function = ActivationFunction::HardSwish;
725 
726  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
727  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
728 }
729 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoSubFloat32GpuAccTest)
730 {
731  ActivationDescriptor activationDescriptor;
732  activationDescriptor.m_Function = ActivationFunction::HardSwish;
733 
734  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
735  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
736 }
737 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoDivFloat32GpuAccTest)
738 {
739  ActivationDescriptor activationDescriptor;
740  activationDescriptor.m_Function = ActivationFunction::HardSwish;
741 
742  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
743  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
744 }
745 
746 // TanH fused into Receiver Layers Float32
747 BOOST_AUTO_TEST_CASE(FuseTanHIntoConvFloat32GpuAccTest)
748 {
749  ActivationDescriptor activationDescriptor;
750  activationDescriptor.m_Function = ActivationFunction::TanH;
751 
752  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
753  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
754 }
755 BOOST_AUTO_TEST_CASE(FuseTanHIntoMulFloat32GpuAccTest)
756 {
757  ActivationDescriptor activationDescriptor;
758  activationDescriptor.m_Function = ActivationFunction::TanH;
759 
760  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
761  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
762 }
763 BOOST_AUTO_TEST_CASE(FuseTanHIntoAddFloat32GpuAccTest)
764 {
765  ActivationDescriptor activationDescriptor;
766  activationDescriptor.m_Function = ActivationFunction::TanH;
767 
768  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
769  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
770 }
771 BOOST_AUTO_TEST_CASE(FuseTanHIntoSubFloat32GpuAccTest)
772 {
773  ActivationDescriptor activationDescriptor;
774  activationDescriptor.m_Function = ActivationFunction::TanH;
775 
776  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
777  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
778 }
779 BOOST_AUTO_TEST_CASE(FuseTanHIntoDivFloat32GpuAccTest)
780 {
781  ActivationDescriptor activationDescriptor;
782  activationDescriptor.m_Function = ActivationFunction::TanH;
783 
784  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
785  (activationDescriptor, 0.0001f, armnn::Compute::GpuAcc);
786 }
787 #endif
788 
BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
bool m_BiasEnabled
Enable/disable bias.
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled)
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:25
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:105
virtual IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)=0
Adds a batch normalization layer to the network.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing)
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2020 ARM Limited.
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
virtual IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)=0
Adds a fully connected layer to the network.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
virtual IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)=0
Adds a 2D convolution layer to the network.
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
virtual IConnectableLayer * AddAdditionLayer(const char *name=nullptr)=0
Adds an addition layer to the network.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
DataType
Definition: Types.hpp:32
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1011
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:600
GPU Execution: OpenCL: ArmCompute.
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:20
min(a, max(b, input)) ReLu1 & ReLu6.
virtual IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)=0
Adds a 2D depthwise convolution layer to the network.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, armnn::Compute backendId)
This layer represents an addition operation.
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:45
BOOST_AUTO_TEST_SUITE_END()
This layer represents a subtraction operation.
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
Definition: TestUtils.hpp:21
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:46
virtual IConnectableLayer * AddDivisionLayer(const char *name=nullptr)=0
Adds a division layer to the network.
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:47
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
LayerTestResult< float, 4 > DepthwiseConvolution2dTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled, const armnn::DataLayout layout)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:43
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
virtual IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)=0
Adds a multiplication layer to the network.
virtual IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)=0
Adds a subtraction layer to the network.