ArmNN
 21.02
FuseActivationTests.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #include "LayersFwd.hpp"
7 
8 #include <Network.hpp>
9 #include <ResolveType.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <test/TestUtils.hpp>
12 
13 #include <boost/test/unit_test.hpp>
14 
15 #include <QuantizeHelper.hpp>
16 #include <string>
17 
18 using namespace armnn;
19 
21 
22 namespace armnn
23 {
24 
25 template<typename T>
26 std::vector<T> GetVector(unsigned int size, float initial, float increment)
27 {
28  std::vector<float> typeVector(size, initial);
29  std::vector<T> vector(size);
30 
31  if (size > 1)
32  {
33  for (unsigned int i = 0; i < size; ++i)
34  {
35  vector[i] = T(initial + (increment * static_cast<float>(i)));
36  }
37  }
38  return vector;
39 }
40 
41 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
42 struct Convolution2dTest
43 {
45  static const bool isElementWise = false;
46 
47  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
48  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 4}); } // NHWCout
49  static TensorShape GetWeightsShape() { return TensorShape( {4, 2, 2, 3}); } // CoutHWCin
50 
51  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
52  constexpr static const unsigned int outputSize = 36; // batchOut * heightOut * widthOut * channelOut
53 
54  static IConnectableLayer* AddReceiverLayer(INetwork* network,
55  const char* name,
56  float scale = 1.f,
57  int32_t offset = 0)
58  {
59  Convolution2dDescriptor descriptor;
60  descriptor.m_DataLayout = DataLayout::NHWC;
61  descriptor.m_StrideX = 1;
62  descriptor.m_StrideY = 1;
63 
64  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
65  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
66  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
67  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
68  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
69  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
70  ConstTensor weights(weightsInfo, weightsVector);
71  Optional<ConstTensor> optionalBias;
72 
73  return network->AddConvolution2dLayer(descriptor, weights, optionalBias, name);
74  }
75 };
76 
77 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
78 struct DWConvolution2dTest
79 {
80 public:
82  static const bool isElementWise = false;
83 
84  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
85  static TensorShape GetOutputShape() { return TensorShape( {1, 3, 3, 12}); } // NHWCout
86  static TensorShape GetWeightsShape() { return TensorShape( {4, 3, 2, 2}); } // MCinHW
87 
88  constexpr static const unsigned int inputSize = 48; //batchIn * heightIn * widthIn * channelIn;
89  constexpr static const unsigned int outputSize = 108; //batchOut * heightOut * widthOut * channelOut;
90 
91  static IConnectableLayer* AddReceiverLayer(INetwork* network,
92  const char* name,
93  float scale = 1.f,
94  int32_t offset = 0)
95  {
97  descriptor.m_BiasEnabled = false;
98  descriptor.m_DataLayout = DataLayout::NHWC;
99  descriptor.m_StrideX = 1;
100  descriptor.m_StrideY = 1;
101 
102  std::vector<float> weightsData = { 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12,
103  11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22,
104  21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
105  31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42};
106  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
107  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
108  ConstTensor weights(weightsInfo, weightsVector);
109  Optional<ConstTensor> optionalBias;
110 
111  return network->AddDepthwiseConvolution2dLayer(descriptor, weights, optionalBias, name);
112  }
113 };
114 
115 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
116 struct FullyConnectedTest
117 {
118 public:
120  static const bool isElementWise = false;
121 
122  static TensorShape GetInputShape() { return TensorShape( {2, 5, 1, 1}); } // NCinHW
123  static TensorShape GetOutputShape() { return TensorShape( {2, 3}); } // NCout
124  static TensorShape GetWeightsShape() { return TensorShape( {5, 3}); } // CinCout
125 
126  constexpr static const unsigned int inputSize = 10; // batchIn * heightIn * widthIn * channelIn
127  constexpr static const unsigned int outputSize = 6; // batchOut * heightOut * widthOut * channelOut
128 
129  static IConnectableLayer* AddReceiverLayer(INetwork* network,
130  const char* name,
131  float scale = 1.f,
132  int32_t offset = 0)
133  {
134  FullyConnectedDescriptor descriptor;
135  descriptor.m_BiasEnabled = false;
136 
137  std::vector<float> weightsData = { 1, 2, 3, 4, 5,
138  6, 7, 8, 9, 10,
139  11, 12, 13, 14, 15};
140  std::vector<T> weightsVector = armnnUtils::QuantizedVector<T>(weightsData, scale, offset);
141  TensorInfo weightsInfo(GetWeightsShape(), ArmnnType, scale, offset);
142  ConstTensor weights(weightsInfo, weightsVector);
143  Optional<ConstTensor> optionalBias;
144 
145  return network->AddFullyConnectedLayer(descriptor, weights, optionalBias, name);
146  }
147 };
148 
149 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
150 struct BatchNormTest
151 {
152 public:
154  static const bool isElementWise = false;
155 
156  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
157  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
158 
159  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
160  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
161 
162  static IConnectableLayer* AddReceiverLayer(INetwork* network,
163  const char* name,
164  float scale = 1.f,
165  int32_t offset = 0)
166  {
167  IgnoreUnused(scale);
168  IgnoreUnused(offset);
169 
170  BatchNormalizationDescriptor descriptor;
171  descriptor.m_DataLayout = DataLayout::NHWC;
172 
173  std::vector<T> betaVector = GetVector<T>(GetOutputShape()[3], 0.0f, 0.2f);
174  std::vector<T> gammaVector = GetVector<T>(GetOutputShape()[3], 0.5f, 0.1f);
175  std::vector<T> meanVector = GetVector<T>(GetOutputShape()[3], 0.1f, 0.1f);
176  std::vector<T> varianceVector = GetVector<T>(GetOutputShape()[3], 1.0f, 0.1f);
177 
178  const unsigned int outputChannelSize[] = { GetOutputShape()[3] };
179  ConstTensor beta(TensorInfo(1, outputChannelSize, ArmnnType), betaVector);
180  ConstTensor gamma(TensorInfo(1, outputChannelSize, ArmnnType), gammaVector);
181  ConstTensor mean(TensorInfo(1, outputChannelSize, ArmnnType), meanVector);
182  ConstTensor variance(TensorInfo(1, outputChannelSize, ArmnnType), varianceVector);
183 
184  return network->AddBatchNormalizationLayer(descriptor, mean, variance, beta, gamma, name);
185  }
186 };
187 
188 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
189 struct MultiplicationTest
190 {
192  static const bool isElementWise = true;
193 
194  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
195  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
196 
197  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
198  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
199 
200  static IConnectableLayer* AddReceiverLayer(INetwork* network,
201  const char* name,
202  float scale = 1.f,
203  int32_t offset = 0)
204  {
205  IgnoreUnused(scale);
206  IgnoreUnused(offset);
207 
208  return network->AddMultiplicationLayer(name);
209  }
210 };
211 
212 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
213 struct AdditionTest
214 {
215  using LayerType = AdditionLayer;
216  static const bool isElementWise = true;
217 
218  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
219  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
220 
221  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
222  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
223 
224  static IConnectableLayer* AddReceiverLayer(INetwork* network,
225  const char* name,
226  float scale = 1.f,
227  int32_t offset = 0)
228  {
229  IgnoreUnused(scale);
230  IgnoreUnused(offset);
231 
232  return network->AddAdditionLayer(name);
233  }
234 };
235 
236 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
237 struct SubtractionTest
238 {
239  using LayerType = SubtractionLayer;
240  static const bool isElementWise = true;
241 
242  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
243  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
244 
245  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
246  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
247 
248  static IConnectableLayer* AddReceiverLayer(INetwork* network,
249  const char* name,
250  float scale = 1.f,
251  int32_t offset = 0)
252  {
253  IgnoreUnused(scale);
254  IgnoreUnused(offset);
255 
256  return network->AddSubtractionLayer(name);
257  }
258 };
259 
260 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
261 struct DivisionTest
262 {
263  using LayerType = DivisionLayer;
264  static const bool isElementWise = true;
265 
266  static TensorShape GetInputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCin
267  static TensorShape GetOutputShape() { return TensorShape( {1, 4, 4, 3}); } // NHWCout
268 
269  constexpr static const unsigned int inputSize = 48; // batchIn * heightIn * widthIn * channelIn
270  constexpr static const unsigned int outputSize = 48; // batchOut * heightOut * widthOut * channelOut
271 
272  static IConnectableLayer* AddReceiverLayer(INetwork* network,
273  const char* name,
274  float scale = 1.f,
275  int32_t offset = 0)
276  {
277  IgnoreUnused(scale);
278  IgnoreUnused(offset);
279 
280  return network->AddDivisionLayer(name);
281  }
282 };
283 
284 template<typename LayerTest,
285  DataType ArmnnType>
286 INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing,
287  float scale, int32_t offset)
288 {
289  // Create a network
290  INetworkPtr network = INetwork::Create();
291 
292  IConnectableLayer* inputLayer = network->AddInputLayer(0);
293 
294  IConnectableLayer* receiverLayer = LayerTest::AddReceiverLayer(network.get(),
295  "receiverLayer",
296  scale,
297  offset);
298 
299  IConnectableLayer* activationLayer = network->AddActivationLayer(activationDescriptor,
300  "activation");
301 
302  IConnectableLayer* outputLayer = network->AddOutputLayer(0);
303  IConnectableLayer* output2Layer = preventFusing?network->AddOutputLayer(1):nullptr;
304 
305  // Define layers information
306  TensorInfo inputInfo(LayerTest::GetInputShape(), ArmnnType, scale, offset);
307  TensorInfo outputInfo(LayerTest::GetOutputShape(), ArmnnType, scale, offset);
308 
309  // Set layer information
310  inputLayer->GetOutputSlot(0).SetTensorInfo(inputInfo);
311  receiverLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
312  activationLayer->GetOutputSlot(0).SetTensorInfo(outputInfo);
313 
314  // Connect layers
315  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(0));
316  receiverLayer->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
317  activationLayer->GetOutputSlot(0).Connect(outputLayer->GetInputSlot(0));
318 
319  if (LayerTest::isElementWise)
320  {
321  inputLayer->GetOutputSlot(0).Connect(receiverLayer->GetInputSlot(1));
322  }
323  if (preventFusing)
324  {
325  receiverLayer->GetOutputSlot(0).Connect(output2Layer->GetInputSlot(0));
326  }
327 
328  return network;
329 }
330 
331 template<typename LayerTest,
332  DataType ArmnnType,
333  typename LayerType = typename LayerTest::LayerType,
334  typename T = ResolveType<ArmnnType>>
335 void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId,
336  float scale = 1.f, int32_t offset=0)
337 {
338  // FIRST NETWORK: Fused
339  // Construct ArmNN network
340  INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
341 
342  // Create ArmNN runtime
343  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
344 
345  // Optimise ArmNN network
346  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
347 
348  Graph& graphFused = GetGraphForTesting(optNetFused.get());
349 
350  auto checkFusedConv2d = [](const Layer* const layer)->bool {
351  return IsLayerOfType<LayerType>(layer) &&
352  (layer->GetNameStr() == "fused-activation-into-receiverLayer");
353  };
354 
355  BOOST_CHECK(3 == graphFused.GetNumLayers());
356  BOOST_TEST(CheckSequence(graphFused.cbegin(),
357  graphFused.cend(),
358  &IsLayerOfType<InputLayer>,
359  checkFusedConv2d,
360  &IsLayerOfType<OutputLayer>));
361 
362  // Load network into runtime
363  NetworkId networkIdentifier;
364  BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
365 
366  //Creates structures for inputs and outputs.
367  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
368  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
369  std::vector<T> outputDataFused(LayerTest::outputSize);
370 
371  InputTensors inputTensorsFused{
372  {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
373  OutputTensors outputTensorsFused{
374  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
375 
376  // Execute network
377  BOOST_TEST(run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused) == Status::Success);
378 
379  // SECOND NETWORK: NotFused
380  // Construct ArmNN network
381  INetworkPtr networkNotFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, true, scale, offset);
382 
383  // Create ArmNN runtime
384  IRuntimePtr runNotFused = IRuntime::Create(IRuntime::CreationOptions()); // default options
385 
386  // Optimise ArmNN network
387  IOptimizedNetworkPtr optNetNotFused = Optimize(*networkNotFused, {backendId}, runNotFused->GetDeviceSpec());
388 
389  Graph& graphNotFused = GetGraphForTesting(optNetNotFused.get());
390 
391  BOOST_CHECK(5 == graphNotFused.GetNumLayers());
392  BOOST_TEST(CheckSequence(graphNotFused.cbegin(),
393  graphNotFused.cend(),
394  &IsLayerOfType<InputLayer>,
395  &IsLayerOfType<LayerType>,
396  &IsLayerOfType<ActivationLayer>,
397  &IsLayerOfType<OutputLayer>,
398  &IsLayerOfType<OutputLayer>));
399 
400  // Load network into runtime
401  NetworkId networkIdentifierNotFused;
402  BOOST_TEST(runNotFused->LoadNetwork(networkIdentifierNotFused, std::move(optNetNotFused)) == Status::Success);
403 
404  //Creates structures for inputs and outputs.
405  std::vector<T> inputDataNotFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
406  std::vector<T> outputDataNotFused(LayerTest::outputSize);
407  std::vector<T> outputData2NotFused(LayerTest::outputSize);
408 
409  InputTensors inputTensorsNotFused{
410  {0, ConstTensor(runNotFused->GetInputTensorInfo(networkIdentifierNotFused, 0), inputDataNotFused.data())}};
411  OutputTensors outputTensorsNotFused{
412  {0, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 0), outputDataNotFused.data())},
413  {1, Tensor(runNotFused->GetOutputTensorInfo(networkIdentifierNotFused, 1), outputData2NotFused.data())}};
414 
415  // Execute network
416  BOOST_TEST(runNotFused->EnqueueWorkload(networkIdentifierNotFused, inputTensorsNotFused, outputTensorsNotFused)
417  == Status::Success);
418 
419  // Check the output of the fused-activation matches with the output of the activation in the "NotFused" network
420  for (unsigned int n = 0; n < outputDataFused.size(); ++n)
421  {
422  BOOST_CHECK_CLOSE(static_cast<float>(outputDataFused[n]), static_cast<float>(outputDataNotFused[n]),
423  T(tolerance));
424  }
425 }
426 
427 template<typename LayerTest,
428  DataType ArmnnType,
429  typename LayerType = typename LayerTest::LayerType,
430  typename T = ResolveType<ArmnnType>>
431 bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId,
432  float scale = 1.f, int32_t offset = 0)
433 {
434  bool success;
435  try
436  {
437  // Construct ArmNN network
438  INetworkPtr networkFused = CreatNetwork<LayerTest, ArmnnType>(activationDescriptor, false, scale, offset);
439 
440  // Create ArmNN runtime
441  IRuntimePtr run = IRuntime::Create(IRuntime::CreationOptions()); // default options
442 
443  // Optimise ArmNN network
444  IOptimizedNetworkPtr optNetFused = Optimize(*networkFused, {backendId}, run->GetDeviceSpec());
445 
446  // Load network into runtime
447  NetworkId networkIdentifier;
448  BOOST_TEST(run->LoadNetwork(networkIdentifier, std::move(optNetFused)) == Status::Success);
449 
450  //Creates structures for inputs and outputs.
451  std::vector<float> data = GetVector<float>(LayerTest::inputSize, 1.0f, 0.1f);
452  std::vector<T> inputDataFused = armnnUtils::QuantizedVector<T>(data, scale, offset);
453  std::vector<T> outputDataFused(LayerTest::outputSize);
454 
455  InputTensors inputTensorsFused{
456  {0, ConstTensor(run->GetInputTensorInfo(networkIdentifier, 0), inputDataFused.data())}};
457  OutputTensors outputTensorsFused{
458  {0, Tensor(run->GetOutputTensorInfo(networkIdentifier, 0), outputDataFused.data())}};
459 
460  // Execute network
461  run->EnqueueWorkload(networkIdentifier, inputTensorsFused, outputTensorsFused);
462 
463  success = true;
464  }
465  catch (const std::exception& e)
466  {
467  std::cerr << e.what() << std::endl;
468  success = false;
469  }
470 
471  return success;
472 }
473 
474 } // namespace armnn
475 
476 using namespace armnn;
477 #if defined(ARMCOMPUTENEON_ENABLED)
478 // ReLu fused into Receiver Layers Float32
479 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32CpuAccTest)
480 {
481  ActivationDescriptor activationDescriptor;
482  activationDescriptor.m_Function = ActivationFunction::ReLu;
483 
484  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
485  (activationDescriptor, 0.0001f, Compute::CpuAcc);
486 }
487 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32CpuAccTest)
488 {
489  ActivationDescriptor activationDescriptor;
490  activationDescriptor.m_Function = ActivationFunction::ReLu;
491 
492  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
493  (activationDescriptor, 0.0001f, Compute::CpuAcc);
494 }
495 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32CpuAccTest)
496 {
497  ActivationDescriptor activationDescriptor;
498  activationDescriptor.m_Function = ActivationFunction::ReLu;
499 
500  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
501  (activationDescriptor, 0.0001f, Compute::CpuAcc);
502 }
503 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32CpuAccTest)
504 {
505  ActivationDescriptor activationDescriptor;
506  activationDescriptor.m_Function = ActivationFunction::ReLu;
507 
508  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
509  (activationDescriptor, 0.0001f, Compute::CpuAcc);
510 }
511 
512 // BoundedReLu fused into Receiver Layers Float32
513 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvFloat32CpuAccTest)
514 {
515  ActivationDescriptor activationDescriptor;
516  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
517  activationDescriptor.m_A = 1.0f;
518  activationDescriptor.m_B = -1.0f;
519 
520  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
521  (activationDescriptor, 0.0001f, Compute::CpuAcc);
522 }
523 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32CpuAccTest)
524 {
525  ActivationDescriptor activationDescriptor;
526  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
527  activationDescriptor.m_A = 1.0f;
528  activationDescriptor.m_B = -1.0f;
529 
530  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::Float32 > , DataType::Float32 >
531  (activationDescriptor, 0.0001f, Compute::CpuAcc);
532 }
533 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32CpuAccTest)
534 {
535  ActivationDescriptor activationDescriptor;
536  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
537  activationDescriptor.m_A = 1.0f;
538  activationDescriptor.m_B = -1.0f;
539 
540  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
541  (activationDescriptor, 0.0001f, Compute::CpuAcc);
542 }
543 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32CpuAccTest)
544 {
545  ActivationDescriptor activationDescriptor;
546  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
547  activationDescriptor.m_A = 1.0f;
548  activationDescriptor.m_B = -1.0f;
549 
550  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
551  (activationDescriptor, 0.0001f, Compute::CpuAcc);
552 }
553 
554 // ReLU fused into Receiver Layers QAsymmU8
555 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvQAsymmU8CpuAccTest)
556 {
557  ActivationDescriptor activationDescriptor;
558  activationDescriptor.m_Function = ActivationFunction::ReLu;
559 
560  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
561  (activationDescriptor, 0.0001f, Compute::CpuAcc);
562 }
563 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvQAsymmU8CpuAccTest)
564 {
565  ActivationDescriptor activationDescriptor;
566  activationDescriptor.m_Function = ActivationFunction::ReLu;
567 
568  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
569  (activationDescriptor, 0.0001f, Compute::CpuAcc);
570 }
571 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedQAsymmU8CpuAccTest)
572 {
573  ActivationDescriptor activationDescriptor;
574  activationDescriptor.m_Function = ActivationFunction::ReLu;
575 
576  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
577  (activationDescriptor, 0.0001f, Compute::CpuAcc);
578 }
579 
580 // BoundedReLu fused into Receiver Layers QAsymmS8
581 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvQASymmS8CpuAccTest)
582 {
583  ActivationDescriptor activationDescriptor;
584  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
585  activationDescriptor.m_A = 6.0f;
586  activationDescriptor.m_B = 0.0f;
587 
588  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
589  (activationDescriptor, 0.0001f, Compute::CpuAcc);
590 }
591 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvQASymmS8CpuAccTest)
592 {
593  ActivationDescriptor activationDescriptor;
594  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
595  activationDescriptor.m_A = 6.0f;
596  activationDescriptor.m_B = 0.0f;
597 
598  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
599  (activationDescriptor, 0.0001f, Compute::CpuAcc);
600 }
601 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedQASymmS8CpuAccTest)
602 {
603  ActivationDescriptor activationDescriptor;
604  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
605  activationDescriptor.m_A = 6.0f;
606  activationDescriptor.m_B = 0.0f;
607 
608  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
609  (activationDescriptor, 0.0001f, Compute::CpuAcc);
610 }
611 
612 // TanH fused into Receiver Layers Float32
613 BOOST_AUTO_TEST_CASE(FuseTanHIntoConvFloat32CpuAccTest)
614 {
615  ActivationDescriptor activationDescriptor;
616  activationDescriptor.m_Function = ActivationFunction::TanH;
617 
618  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
619  (activationDescriptor, 0.0001f, Compute::CpuAcc);
620 }
621 
622 // HardSwish fused into Receiver Layers Float32
623 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoConvFloat32CpuAccTest)
624 {
625  ActivationDescriptor activationDescriptor;
626  activationDescriptor.m_Function = ActivationFunction::HardSwish;
627 
628  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
629  (activationDescriptor, 0.0001f, Compute::CpuAcc);
630 }
631 
632 // Test that all receiver layers follow by all activation layers work, either fused or not fused
633 BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat32CpuAccTest)
634 {
635  ActivationDescriptor activationDescriptor;
636  for (int i = 0; i != 12; ++i)
637  {
638  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
639  activationDescriptor.m_A = 1.0f;
640  activationDescriptor.m_B = -1.0f;
641  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
642  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
643  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
644  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
645  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
646  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
647  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
648  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
649  }
650 }
651 BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat16CpuAccTest)
652 {
653  ActivationDescriptor activationDescriptor;
654  for (int i = 0; i != 12; ++i)
655  {
656  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
657  activationDescriptor.m_A = 1.0f;
658  activationDescriptor.m_B = -1.0f;
659  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
660  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " << i);
661  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
662  (activationDescriptor, Compute::CpuAcc)), "DepthwiseConvolution + Activation function " << i);
663  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
664  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " << i);
665  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
666  (activationDescriptor, Compute::CpuAcc)), "BatchNorm + Activation function " << i);
667  }
668 }
669 BOOST_AUTO_TEST_CASE(LayerFollowedByActivationQAsymmU8CpuAccTest)
670 {
671  ActivationDescriptor activationDescriptor;
672 
673  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
674  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
675  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
676  static_cast<int>(activationDescriptor.m_Function));
677  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
678  (activationDescriptor, Compute::CpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
679  static_cast<int>(activationDescriptor.m_Function));
680 
681  activationDescriptor.m_Function = ActivationFunction::TanH;
682  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
683  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
684  static_cast<int>(activationDescriptor.m_Function));
685  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
686  (activationDescriptor, Compute::CpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
687  static_cast<int>(activationDescriptor.m_Function));
688 
689  activationDescriptor.m_Function = ActivationFunction::ReLu;
690  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
691  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
692  static_cast<int>(activationDescriptor.m_Function));
693  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
694  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
695  static_cast<int>(activationDescriptor.m_Function));
696 
697  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
698  activationDescriptor.m_A = 1.0f;
699  activationDescriptor.m_B = -1.0f;
700  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
701  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
702  static_cast<int>(activationDescriptor.m_Function));
703  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
704  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
705  static_cast<int>(activationDescriptor.m_Function));
706 
707  activationDescriptor.m_Function = ActivationFunction::HardSwish;
708  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
709  (activationDescriptor, Compute::CpuAcc)), "Convolution + Activation function " <<
710  static_cast<int>(activationDescriptor.m_Function));
711  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
712  (activationDescriptor, Compute::CpuAcc)), "FullyConnected + Activation function " <<
713  static_cast<int>(activationDescriptor.m_Function));
714 }
715 #endif
716 
717 #if defined(ARMCOMPUTECL_ENABLED)
718 // ReLu fused into Receiver Layers Float32
719 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat32GpuAccTest)
720 {
721  ActivationDescriptor activationDescriptor;
722  activationDescriptor.m_Function = ActivationFunction::ReLu;
723 
724  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
725  (activationDescriptor, 0.0001f, Compute::GpuAcc);
726 }
727 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat32GpuAccTest)
728 {
729  ActivationDescriptor activationDescriptor;
730  activationDescriptor.m_Function = ActivationFunction::ReLu;
731 
732  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
733  (activationDescriptor, 0.0001f, Compute::GpuAcc);
734 }
735 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat32GpuAccTest)
736 {
737  ActivationDescriptor activationDescriptor;
738  activationDescriptor.m_Function = ActivationFunction::ReLu;
739 
740  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
741  (activationDescriptor, 0.0001f, Compute::GpuAcc);
742 }
743 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat32GpuAccTest)
744 {
745  ActivationDescriptor activationDescriptor;
746  activationDescriptor.m_Function = ActivationFunction::ReLu;
747 
748  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
749  (activationDescriptor, 0.0001f, Compute::GpuAcc);
750 }
751 BOOST_AUTO_TEST_CASE(FuseReLUIntoMulFloat32GpuAccTest)
752 {
753  ActivationDescriptor activationDescriptor;
754  activationDescriptor.m_Function = ActivationFunction::ReLu;
755 
756  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
757  (activationDescriptor, 0.0001f, Compute::GpuAcc);
758 }
759 BOOST_AUTO_TEST_CASE(FuseReLUIntoAddFloat32GpuAccTest)
760 {
761  ActivationDescriptor activationDescriptor;
762  activationDescriptor.m_Function = ActivationFunction::ReLu;
763 
764  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
765  (activationDescriptor, 0.0001f, Compute::GpuAcc);
766 }
767 BOOST_AUTO_TEST_CASE(FuseReLUIntoSubFloat32GpuAccTest)
768 {
769  ActivationDescriptor activationDescriptor;
770  activationDescriptor.m_Function = ActivationFunction::ReLu;
771 
772  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
773  (activationDescriptor, 0.0001f, Compute::GpuAcc);
774 }
775 BOOST_AUTO_TEST_CASE(FuseReLUIntoDivFloat32GpuAccTest)
776 {
777  ActivationDescriptor activationDescriptor;
778  activationDescriptor.m_Function = ActivationFunction::ReLu;
779 
780  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
781  (activationDescriptor, 0.0001f, Compute::GpuAcc);
782 }
783 
784 // BoundedReLu fused into Receiver Layers Float32
785 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvFloat32GpuAccTest)
786 {
787  ActivationDescriptor activationDescriptor;
788  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
789  activationDescriptor.m_A = 1.0f;
790  activationDescriptor.m_B = -1.0f;
791 
792  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
793  (activationDescriptor, 0.0001f, Compute::GpuAcc);
794 }
795 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvFloat32GpuAccTest)
796 {
797  ActivationDescriptor activationDescriptor;
798  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
799  activationDescriptor.m_A = 1.0f;
800  activationDescriptor.m_B = -1.0f;
801 
802  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
803  (activationDescriptor, 0.0001f, Compute::GpuAcc);
804 }
805 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedFloat32GpuAccTest)
806 {
807  ActivationDescriptor activationDescriptor;
808  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
809  activationDescriptor.m_A = 1.0f;
810  activationDescriptor.m_B = -1.0f;
811 
812  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
813  (activationDescriptor, 0.0001f, Compute::GpuAcc);
814 }
815 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoBatchNormFloat32GpuAccTest)
816 {
817  ActivationDescriptor activationDescriptor;
818  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
819  activationDescriptor.m_A = 1.0f;
820  activationDescriptor.m_B = -1.0f;
821 
822  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float32>, DataType::Float32>
823  (activationDescriptor, 0.0001f, Compute::GpuAcc);
824 }
825 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoMulFloat32GpuAccTest)
826 {
827  ActivationDescriptor activationDescriptor;
828  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
829  activationDescriptor.m_A = 1.0f;
830  activationDescriptor.m_B = -1.0f;
831 
832  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
833  (activationDescriptor, 0.0001f, Compute::GpuAcc);
834 }
835 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoAddFloat32GpuAccTest)
836 {
837  ActivationDescriptor activationDescriptor;
838  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
839  activationDescriptor.m_A = 1.0f;
840  activationDescriptor.m_B = -1.0f;
841 
842  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
843  (activationDescriptor, 0.0001f, Compute::GpuAcc);
844 }
845 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoSubFloat32GpuAccTest)
846 {
847  ActivationDescriptor activationDescriptor;
848  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
849  activationDescriptor.m_A = 1.0f;
850  activationDescriptor.m_B = -1.0f;
851 
852  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
853  (activationDescriptor, 0.0001f, Compute::GpuAcc);
854 }
855 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDivFloat32GpuAccTest)
856 {
857  ActivationDescriptor activationDescriptor;
858  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
859  activationDescriptor.m_A = 1.0f;
860  activationDescriptor.m_B = -1.0f;
861 
862  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
863  (activationDescriptor, 0.0001f, Compute::GpuAcc);
864 }
865 
866 // ReLu fused into Receiver Layers Float16
867 BOOST_AUTO_TEST_CASE(FuseReLUIntoConvFloat16GpuAccTest)
868 {
869  ActivationDescriptor activationDescriptor;
870  activationDescriptor.m_Function = ActivationFunction::ReLu;
871 
872  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
873  (activationDescriptor, 0.0001f, Compute::GpuAcc);
874 }
875 BOOST_AUTO_TEST_CASE(FuseReLUIntoDWConvFloat16GpuAccTest)
876 {
877  ActivationDescriptor activationDescriptor;
878  activationDescriptor.m_Function = ActivationFunction::ReLu;
879 
880  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
881  (activationDescriptor, 0.0001f, Compute::GpuAcc);
882 }
883 BOOST_AUTO_TEST_CASE(FuseReLUIntoFullyConnectedFloat16GpuAccTest)
884 {
885  ActivationDescriptor activationDescriptor;
886  activationDescriptor.m_Function = ActivationFunction::ReLu;
887 
888  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
889  (activationDescriptor, 0.0001f, Compute::GpuAcc);
890 }
891 BOOST_AUTO_TEST_CASE(FuseReLUIntoBatchNormFloat16GpuAccTest)
892 {
893  ActivationDescriptor activationDescriptor;
894  activationDescriptor.m_Function = ActivationFunction::ReLu;
895 
896  FuseActivationIntoPreviousLayerTest<BatchNormTest<DataType::Float16>, DataType::Float16>
897  (activationDescriptor, 0.0001f, Compute::GpuAcc);
898 }
899 BOOST_AUTO_TEST_CASE(FuseReLUIntoMulFloat16GpuAccTest)
900 {
901  ActivationDescriptor activationDescriptor;
902  activationDescriptor.m_Function = ActivationFunction::ReLu;
903 
904  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
905  (activationDescriptor, 0.0001f, Compute::GpuAcc);
906 }
907 BOOST_AUTO_TEST_CASE(FuseReLUIntoAddFloat16GpuAccTest)
908 {
909  ActivationDescriptor activationDescriptor;
910  activationDescriptor.m_Function = ActivationFunction::ReLu;
911 
912  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float16>, DataType::Float16>
913  (activationDescriptor, 0.0001f, Compute::GpuAcc);
914 }
915 BOOST_AUTO_TEST_CASE(FuseReLUIntoSubFloat16GpuAccTest)
916 {
917  ActivationDescriptor activationDescriptor;
918  activationDescriptor.m_Function = ActivationFunction::ReLu;
919 
920  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float16>, DataType::Float16>
921  (activationDescriptor, 0.0001f, Compute::GpuAcc);
922 }
923 BOOST_AUTO_TEST_CASE(FuseReLUIntoDivFloat16GpuAccTest)
924 {
925  ActivationDescriptor activationDescriptor;
926  activationDescriptor.m_Function = ActivationFunction::ReLu;
927 
928  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float16>, DataType::Float16>
929  (activationDescriptor, 0.0001f, Compute::GpuAcc);
930 }
931 
932 // ReLU fused into Receiver Layers QAsymmU8
933 BOOST_AUTO_TEST_CASE(FuseReLUQIntoConvAsymmU8GpuAccTest)
934 {
935  ActivationDescriptor activationDescriptor;
936  activationDescriptor.m_Function = ActivationFunction::ReLu;
937 
938  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
939  (activationDescriptor, 0.0001f, Compute::GpuAcc);
940 }
941 BOOST_AUTO_TEST_CASE(FuseReLUQIntoDWConvAsymmU8GpuAccTest)
942 {
943  ActivationDescriptor activationDescriptor;
944  activationDescriptor.m_Function = ActivationFunction::ReLu;
945 
946  FuseActivationIntoPreviousLayerTest<DWConvolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
947  (activationDescriptor, 0.0001f, Compute::GpuAcc);
948 }
949 BOOST_AUTO_TEST_CASE(FuseReLUQIntoFullyConnectedAsymmU8GpuAccTest)
950 {
951  ActivationDescriptor activationDescriptor;
952  activationDescriptor.m_Function = ActivationFunction::ReLu;
953 
954  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
955  (activationDescriptor, 0.0001f, Compute::GpuAcc);
956 }
957 
958 // BoundedReLu fused into Receiver Layers QAsymmS8
959 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoConvQASymmS8GpuAccTest)
960 {
961  ActivationDescriptor activationDescriptor;
962  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
963  activationDescriptor.m_A = 6.0f;
964  activationDescriptor.m_B = 0.0f;
965 
966  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::QAsymmS8>, DataType::QAsymmS8>
967  (activationDescriptor, 0.0001f, Compute::GpuAcc);
968 }
969 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoDWConvQASymmS8GpuAccTest)
970 {
971  ActivationDescriptor activationDescriptor;
972  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
973  activationDescriptor.m_A = 6.0f;
974  activationDescriptor.m_B = 0.0f;
975 
976  FuseActivationIntoPreviousLayerTest < DWConvolution2dTest < DataType::QAsymmS8 > , DataType::QAsymmS8 >
977  (activationDescriptor, 0.0001f, Compute::GpuAcc);
978 }
979 BOOST_AUTO_TEST_CASE(FuseBoundedReLUIntoFullyConnectedQASymmS8GpuAccTest)
980 {
981  ActivationDescriptor activationDescriptor;
982  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
983  activationDescriptor.m_A = 6.0f;
984  activationDescriptor.m_B = 0.0f;
985 
986  FuseActivationIntoPreviousLayerTest<FullyConnectedTest<DataType::QAsymmS8>, DataType::QAsymmS8>
987  (activationDescriptor, 0.0001f, Compute::GpuAcc);
988 }
989 
990 // TanH fused into Receiver Layers Float32
991 BOOST_AUTO_TEST_CASE(FuseTanHIntoConvFloat32GpuAccTest)
992 {
993  ActivationDescriptor activationDescriptor;
994  activationDescriptor.m_Function = ActivationFunction::TanH;
995 
996  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
997  (activationDescriptor, 0.0001f, Compute::GpuAcc);
998 }
999 BOOST_AUTO_TEST_CASE(FuseTanHIntoMulFloat32GpuAccTest)
1000 {
1001  ActivationDescriptor activationDescriptor;
1002  activationDescriptor.m_Function = ActivationFunction::TanH;
1003 
1004  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1005  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1006 }
1007 BOOST_AUTO_TEST_CASE(FuseTanHIntoAddFloat32GpuAccTest)
1008 {
1009  ActivationDescriptor activationDescriptor;
1010  activationDescriptor.m_Function = ActivationFunction::TanH;
1011 
1012  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1013  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1014 }
1015 BOOST_AUTO_TEST_CASE(FuseTanHIntoSubFloat32GpuAccTest)
1016 {
1017  ActivationDescriptor activationDescriptor;
1018  activationDescriptor.m_Function = ActivationFunction::TanH;
1019 
1020  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1021  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1022 }
1023 BOOST_AUTO_TEST_CASE(FuseTanHIntoDivFloat32GpuAccTest)
1024 {
1025  ActivationDescriptor activationDescriptor;
1026  activationDescriptor.m_Function = ActivationFunction::TanH;
1027 
1028  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1029  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1030 }
1031 
1032 // HardSwish fused into Receiver Layers Float32
1033 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoConvFloat32GpuAccTest)
1034 {
1035  ActivationDescriptor activationDescriptor;
1036  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1037 
1038  FuseActivationIntoPreviousLayerTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1039  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1040 }
1041 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoMulFloat32GpuAccTest)
1042 {
1043  ActivationDescriptor activationDescriptor;
1044  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1045 
1046  FuseActivationIntoPreviousLayerTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1047  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1048 }
1049 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoAddFloat32GpuAccTest)
1050 {
1051  ActivationDescriptor activationDescriptor;
1052  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1053 
1054  FuseActivationIntoPreviousLayerTest<AdditionTest<DataType::Float32>, DataType::Float32>
1055  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1056 }
1057 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoSubFloat32GpuAccTest)
1058 {
1059  ActivationDescriptor activationDescriptor;
1060  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1061 
1062  FuseActivationIntoPreviousLayerTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1063  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1064 }
1065 BOOST_AUTO_TEST_CASE(FuseHardSwishIntoDivFloat32GpuAccTest)
1066 {
1067  ActivationDescriptor activationDescriptor;
1068  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1069 
1070  FuseActivationIntoPreviousLayerTest<DivisionTest<DataType::Float32>, DataType::Float32>
1071  (activationDescriptor, 0.0001f, Compute::GpuAcc);
1072 }
1073 
1074 // Test that all receiver layers follow by all activation layers work, either fused or not fused
1075 BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat32GpuAccTest)
1076 {
1077  ActivationDescriptor activationDescriptor;
1078  for (int i = 0; i != 12; ++i)
1079  {
1080  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1081  activationDescriptor.m_A = 1.0f;
1082  activationDescriptor.m_B = -1.0f;
1083  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1084  {
1085  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float32>, DataType::Float32>
1086  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1087  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float32>, DataType::Float32>
1088  (activationDescriptor, Compute::GpuAcc)), "DepthwiseConvolution + Activation function " << i);
1089  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float32>, DataType::Float32>
1090  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1091  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float32>, DataType::Float32>
1092  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1093  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float32>, DataType::Float32>
1094  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1095  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float32>, DataType::Float32>
1096  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1097  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float32>, DataType::Float32>
1098  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1099  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float32>, DataType::Float32>
1100  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1101  }
1102  }
1103 }
1104 BOOST_AUTO_TEST_CASE(LayerFollowedByActivationFloat16GpuAccTest)
1105 {
1106  ActivationDescriptor activationDescriptor;
1107  for (int i = 0; i != 12; ++i)
1108  {
1109  activationDescriptor.m_Function = static_cast<ActivationFunction>(i);
1110  activationDescriptor.m_A = 1.0f;
1111  activationDescriptor.m_B = -1.0f;
1112  if (activationDescriptor.m_Function != ActivationFunction::Elu)
1113  {
1114  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::Float16>, DataType::Float16>
1115  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " << i);
1116  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DWConvolution2dTest<DataType::Float16>, DataType::Float16>
1117  (activationDescriptor, Compute::GpuAcc)), "Depthwise + Activation function " << i);
1118  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::Float16>, DataType::Float16>
1119  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " << i);
1120  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<BatchNormTest<DataType::Float16>, DataType::Float16>
1121  (activationDescriptor, Compute::GpuAcc)), "BatchNorm + Activation function " << i);
1122  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<MultiplicationTest<DataType::Float16>, DataType::Float16>
1123  (activationDescriptor, Compute::GpuAcc)), "Multiplication + Activation function " << i);
1124  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<AdditionTest<DataType::Float16>, DataType::Float16>
1125  (activationDescriptor, Compute::GpuAcc)), "Addition + Activation function " << i);
1126  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<SubtractionTest<DataType::Float16>, DataType::Float16>
1127  (activationDescriptor, Compute::GpuAcc)), "Subtraction + Activation function " << i);
1128  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<DivisionTest<DataType::Float16>, DataType::Float16>
1129  (activationDescriptor, Compute::GpuAcc)), "Division + Activation function " << i);
1130  }
1131  }
1132 }
1133 BOOST_AUTO_TEST_CASE(LayerFollowedByActivationQAsymmU8GpuAccTest)
1134 {
1135  ActivationDescriptor activationDescriptor;
1136 
1137  activationDescriptor.m_Function = ActivationFunction::Sigmoid;
1138  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1139  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "Convolution + Activation function " <<
1140  static_cast<int>(activationDescriptor.m_Function));
1141  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1142  (activationDescriptor, Compute::GpuAcc, 1.f / 256.f, 0)), "FullyConnected + Activation function " <<
1143  static_cast<int>(activationDescriptor.m_Function));
1144 
1145  activationDescriptor.m_Function = ActivationFunction::TanH;
1146  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1147  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "Convolution + Activation function " <<
1148  static_cast<int>(activationDescriptor.m_Function));
1149  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1150  (activationDescriptor, Compute::GpuAcc, 1.f / 128.f, 128)), "FullyConnected + Activation function " <<
1151  static_cast<int>(activationDescriptor.m_Function));
1152 
1153  activationDescriptor.m_Function = ActivationFunction::ReLu;
1154  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1155  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1156  static_cast<int>(activationDescriptor.m_Function));
1157  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1158  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1159  static_cast<int>(activationDescriptor.m_Function));
1160 
1161  activationDescriptor.m_Function = ActivationFunction::BoundedReLu;
1162  activationDescriptor.m_A = 1.0f;
1163  activationDescriptor.m_B = -1.0f;
1164  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1165  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1166  static_cast<int>(activationDescriptor.m_Function));
1167  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1168  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1169  static_cast<int>(activationDescriptor.m_Function));
1170 
1171  activationDescriptor.m_Function = ActivationFunction::HardSwish;
1172  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<Convolution2dTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1173  (activationDescriptor, Compute::GpuAcc)), "Convolution + Activation function " <<
1174  static_cast<int>(activationDescriptor.m_Function));
1175  BOOST_CHECK_MESSAGE((FuseActivationSimpleTest<FullyConnectedTest<DataType::QAsymmU8>, DataType::QAsymmU8>
1176  (activationDescriptor, Compute::GpuAcc)), "FullyConnected + Activation function " <<
1177  static_cast<int>(activationDescriptor.m_Function));
1178 }
1179 #endif
1180 
BOOST_AUTO_TEST_SUITE(TensorflowLiteParser)
IConnectableLayer * AddSubtractionLayer(const char *name=nullptr)
Adds a subtraction layer to the network.
Definition: Network.cpp:376
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
This layer represents a batch normalization operation.
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
bool m_BiasEnabled
Enable/disable bias.
LayerTestResult< T, 2 > FullyConnectedTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory, bool biasEnabled)
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
IConnectableLayer * AddDepthwiseConvolution2dLayer(const DepthwiseConvolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D depthwise convolution layer to the network.
Definition: Network.cpp:115
This layer represents a depthwise convolution 2d operation.
A Convolution2dDescriptor for the Convolution2dLayer.
LayerTestResult< float, 4 > DivisionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
DataLayout m_DataLayout
The data layout to be used (NCHW, NHWC).
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:178
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
IConnectableLayer * AddConvolution2dLayer(const Convolution2dDescriptor &convolution2dDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a 2D convolution layer to the network.
Definition: Network.cpp:77
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
IConnectableLayer * AddDivisionLayer(const char *name=nullptr)
Adds a division layer to the network.
Definition: Network.cpp:371
LayerTestResult< float, 4 > AdditionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
IConnectableLayer * AddFullyConnectedLayer(const FullyConnectedDescriptor &fullyConnectedDescriptor, const ConstTensor &weights, const Optional< ConstTensor > &biases, const char *name=nullptr)
Adds a fully connected layer to the network.
Definition: Network.cpp:175
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
uint32_t m_StrideX
Stride value when proceeding through input for the width dimension.
Compute
The Compute enum is now deprecated and it is now being replaced by BackendId.
Definition: BackendId.hpp:21
DataType
Definition: Types.hpp:32
This layer represents a fully connected operation.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A FullyConnectedDescriptor for the FullyConnectedLayer.
bool m_BiasEnabled
Enable/disable bias.
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
std::vector< T > GetVector(unsigned int size, float initial, float increment)
void FuseActivationIntoPreviousLayerTest(ActivationDescriptor activationDescriptor, float tolerance, Compute backendId, float scale=1.f, int32_t offset=0)
GPU Execution: OpenCL: ArmCompute.
BOOST_AUTO_TEST_CASE(CheckConvolution2dLayer)
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
min(a, max(b, input)) ReLu1 & ReLu6.
IConnectableLayer * AddBatchNormalizationLayer(const BatchNormalizationDescriptor &desc, const ConstTensor &mean, const ConstTensor &variance, const ConstTensor &beta, const ConstTensor &gamma, const char *name=nullptr)
Adds a batch normalization layer to the network.
Definition: Network.cpp:272
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
This layer represents an addition operation.
Graph & GetGraphForTesting(IOptimizedNetwork *optNet)
Definition: TestUtils.cpp:25
float m_A
Alpha upper bound value used by the activation functions. (BoundedReLu, Linear, TanH, Elu).
Definition: Descriptors.hpp:50
IConnectableLayer * AddAdditionLayer(const char *name=nullptr)
Adds an addition layer to the network.
Definition: Network.cpp:262
BOOST_AUTO_TEST_SUITE_END()
This layer represents a subtraction operation.
bool FuseActivationSimpleTest(ActivationDescriptor activationDescriptor, Compute backendId, float scale=1.f, int32_t offset=0)
uint32_t m_StrideY
Stride value when proceeding through input for the height dimension.
LayerTestResult< float, 4 > SubtractionTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
CPU Execution: NEON: ArmCompute.
bool CheckSequence(const armnn::Graph::ConstIterator first, const armnn::Graph::ConstIterator last)
Definition: TestUtils.hpp:21
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
This layer represents a division operation.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
This layer represents a convolution 2d operation.
IConnectableLayer * AddMultiplicationLayer(const char *name=nullptr)
Adds a multiplication layer to the network.
Definition: Network.cpp:267
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
This layer represents a multiplication operation.
INetworkPtr CreatNetwork(ActivationDescriptor activationDescriptor, bool preventFusing, float scale, int32_t offset)
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510
float m_B
Beta lower bound value used by the activation functions. (BoundedReLu, Linear, TanH).
Definition: Descriptors.hpp:52
LayerTestResult< float, 4 > MultiplicationTest(armnn::IWorkloadFactory &workloadFactory, const armnn::IBackendInternal::IMemoryManagerSharedPtr &memoryManager, const armnn::ITensorHandleFactory &tensorHandleFactory)
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48
A DepthwiseConvolution2dDescriptor for the DepthwiseConvolution2dLayer.
A BatchNormalizationDescriptor for the BatchNormalizationLayer.
ActivationFunction
Definition: Types.hpp:56
LayerType
When adding a new layer, adapt also the LastLayer enum value in the enum class LayerType below...
Definition: Types.hpp:419