ArmNN
 20.02
EndToEndTestImpl.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/Descriptors.hpp>
8 #include <armnn/INetwork.hpp>
9 #include <armnn/IRuntime.hpp>
10 
11 #include <Profiling.hpp>
12 #include <QuantizeHelper.hpp>
13 #include <ResolveType.hpp>
14 
15 #include <boost/test/unit_test.hpp>
16 
17 #include <vector>
18 
19 namespace
20 {
21 
22 using namespace armnn;
23 
24 template<typename T>
25 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
26  const TensorInfo& commonTensorInfo,
27  const std::vector<T>& inputData,
28  const std::vector<T>& constantData,
29  const std::vector<T>& expectedOutputData)
30 {
31  // Create runtime in which test will run
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input = net->AddInputLayer(0);
39  IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
40  IConnectableLayer* add = net->AddAdditionLayer();
41  IConnectableLayer* output = net->AddOutputLayer(0);
42 
43  input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
44  constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
45  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
46 
47  // Sets the tensors in the network.
48  input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
49  constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
50  add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 
52  // optimize the network
53  IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
54 
55  // Loads it into the runtime.
56  NetworkId netId;
57  runtime->LoadNetwork(netId, std::move(optNet));
58 
59  // Creates structures for input & output.
60  std::vector<T> outputData(inputData.size());
61 
62  InputTensors inputTensors
63  {
64  {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
65  };
66  OutputTensors outputTensors
67  {
68  {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
69  };
70 
71  // Does the inference.
72  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
73 
74  // Checks the results.
75  return outputData == expectedOutputData;
76 }
77 
78 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
79 {
80  const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
81 
82  return ConstantUsageTest(backends,
83  commonTensorInfo,
84  std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
85  std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
86  std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
87  );
88 }
89 
90 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
91 {
92  TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
93 
94  const float scale = 0.023529f;
95  const int8_t offset = -43;
96 
97  commonTensorInfo.SetQuantizationScale(scale);
98  commonTensorInfo.SetQuantizationOffset(offset);
99 
100  return ConstantUsageTest(backends,
101  commonTensorInfo,
102  armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
103  armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
104  armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
105  );
106 }
107 
108 // Utility template for comparing tensor elements
109 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
110 bool Compare(T a, T b)
111 {
112  if (ArmnnType == DataType::Boolean)
113  {
114  // NOTE: Boolean is represented as uint8_t (with zero equals
115  // false and everything else equals true), therefore values
116  // need to be casted to bool before comparing them
117  return static_cast<bool>(a) == static_cast<bool>(b);
118  }
119 
120  // NOTE: All other types can be cast to float and compared with
121  // a certain level of tolerance
122  constexpr float tolerance = 0.000001f;
123  return std::fabs(static_cast<float>(a) - static_cast<float>(b)) <= tolerance;
124 }
125 
126 // Utility function to find the number of instances of a substring within a string.
127 int SubStringCounter(std::string& string, std::string&& substring)
128 {
129  std::size_t found = 0;
130  int count = 0;
131  // Look for the substring starting from where we last found the substring
132  while((found = string.find(substring, found)) != std::string::npos)
133  {
134  count++;
135  // Offset by substring length to avoid finding the same substring twice
136  found += substring.length();
137  }
138  return count;
139 }
140 
141 template<DataType ArmnnIType, DataType ArmnnOType,
142  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
143 void EndToEndLayerTestImpl(INetworkPtr network,
144  const std::map<int, std::vector<TInput>>& inputTensorData,
145  const std::map<int, std::vector<TOutput>>& expectedOutputData,
146  std::vector<BackendId> backends)
147 {
148  // Create runtime in which test will run
150  IRuntimePtr runtime(IRuntime::Create(options));
151 
152  // optimize the network
153  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
154 
155  // Loads it into the runtime.
156  NetworkId netId;
157  runtime->LoadNetwork(netId, std::move(optNet));
158 
159  InputTensors inputTensors;
160  inputTensors.reserve(inputTensorData.size());
161  for (auto&& it : inputTensorData)
162  {
163  inputTensors.push_back({it.first,
164  ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
165  }
166  OutputTensors outputTensors;
167  outputTensors.reserve(expectedOutputData.size());
168  std::map<int, std::vector<TOutput>> outputStorage;
169  for (auto&& it : expectedOutputData)
170  {
171  std::vector<TOutput> out(it.second.size());
172  outputStorage.emplace(it.first, out);
173  outputTensors.push_back({it.first,
174  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
175  outputStorage.at(it.first).data())});
176  }
177 
178  // Does the inference.
179  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
180 
181  // Checks the results.
182  for (auto&& it : expectedOutputData)
183  {
184  std::vector<TOutput> out = outputStorage.at(it.first);
185  for (unsigned int i = 0; i < out.size(); ++i)
186  {
187  BOOST_CHECK(Compare<ArmnnOType>(it.second[i], out[i]) == true);
188  }
189  }
190 }
191 
192 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
193 {
194  using namespace armnn;
195 
196  // Create runtime in which test will run
198  IRuntimePtr runtime(armnn::IRuntime::Create(options));
199 
200  // build up the structure of the network
202 
203  IConnectableLayer* input = net->AddInputLayer(0);
204 
205  ActivationDescriptor descriptor;
207  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
208 
209  IConnectableLayer* output = net->AddOutputLayer(0);
210 
211  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
212  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
213 
214  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
215  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
216 
217  // Optimize the network
218  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
219  BOOST_CHECK(optNet);
220 
221  // Loads it into the runtime.
222  NetworkId netId;
223  std::string ignoredErrorMessage;
224  // Enable Importing
225  INetworkProperties networkProperties(true, false);
226  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
227 
228  // Creates structures for input & output
229  std::vector<float> inputData
230  {
231  1.0f, 2.0f, 3.0f, 4.0f
232  };
233 
234  // Misaligned input
235  float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
236 
237  std::vector<float> outputData(4);
238 
239  // Aligned output
240  float* alignedOutputData = outputData.data();
241 
242  InputTensors inputTensors
243  {
244  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
245  };
246  OutputTensors outputTensors
247  {
248  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
249  };
250 
251  runtime->GetProfiler(netId)->EnableProfiling(true);
252 
253  // Do the inference and expect it to fail with a ImportMemoryException
254  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
255 }
256 
257 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
258 {
259  using namespace armnn;
260 
261  // Create runtime in which test will run
263  IRuntimePtr runtime(armnn::IRuntime::Create(options));
264 
265  // build up the structure of the network
267 
268  IConnectableLayer* input = net->AddInputLayer(0);
269 
270  ActivationDescriptor descriptor;
272  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
273 
274  IConnectableLayer* output = net->AddOutputLayer(0);
275 
276  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
277  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
278 
279  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
280  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
281 
282  // Optimize the network
283  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
284  BOOST_CHECK(optNet);
285 
286  // Loads it into the runtime.
287  NetworkId netId;
288  std::string ignoredErrorMessage;
289  // Enable Importing and Exporting
290  INetworkProperties networkProperties(true, true);
291  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
292 
293  // Creates structures for input & output
294  std::vector<float> inputData
295  {
296  1.0f, 2.0f, 3.0f, 4.0f, 5.0f
297  };
298 
299  // Aligned input
300  float* alignedInputData = inputData.data();
301 
302  std::vector<float> outputData(5);
303 
304  // Misaligned output
305  float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
306 
307  InputTensors inputTensors
308  {
309  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
310  };
311  OutputTensors outputTensors
312  {
313  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
314  };
315 
316  // Do the inference and expect it to fail with a ExportMemoryException
317  if (backends[0] == Compute::CpuAcc)
318  {
319  // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
320  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
321  }
322  else
323  {
324  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
325  }
326 }
327 
328 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
329 {
330  using namespace armnn;
331 
332  // Create runtime in which test will run
334  IRuntimePtr runtime(armnn::IRuntime::Create(options));
335 
336  // build up the structure of the network
338 
339  IConnectableLayer* input = net->AddInputLayer(0);
340 
341  ActivationDescriptor descriptor;
343  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
344 
345  IConnectableLayer* output = net->AddOutputLayer(0);
346 
347  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
348  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
349 
350  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
351  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
352 
353  // Optimize the network
354  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
355  BOOST_CHECK(optNet);
356 
357  // Loads it into the runtime.
358  NetworkId netId;
359  std::string ignoredErrorMessage;
360  // Enable Importing
361  INetworkProperties networkProperties(true, true);
362  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
363 
364  // Creates structures for input & output
365  std::vector<float> inputData
366  {
367  1.0f, 2.0f, 3.0f, 4.0f
368  };
369 
370  std::vector<float> outputData(4);
371 
372  std::vector<float> expectedOutput
373  {
374  1.0f, 4.0f, 9.0f, 16.0f
375  };
376 
377  InputTensors inputTensors
378  {
379  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
380  };
381  OutputTensors outputTensors
382  {
383  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
384  };
385 
386  runtime->GetProfiler(netId)->EnableProfiling(true);
387 
388  // Do the inference
389  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
390 
391  // Retrieve the Profiler.Print() output to get the workload execution
393  std::stringstream ss;
394  profilerManager.GetProfiler()->Print(ss);;
395  std::string dump = ss.str();
396 
397  // Contains ActivationWorkload
398  std::size_t found = dump.find("ActivationWorkload");
399  BOOST_TEST(found != std::string::npos);
400 
401  // Contains SyncMemGeneric
402  found = dump.find("SyncMemGeneric");
403  BOOST_TEST(found != std::string::npos);
404 
405  // Does not contain CopyMemGeneric
406  found = dump.find("CopyMemGeneric");
407  BOOST_TEST(found == std::string::npos);
408 
409  // Check output is as expected
410  BOOST_TEST(outputData == expectedOutput);
411 }
412 
413 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
414 {
415  using namespace armnn;
416 
418  IRuntimePtr runtime(IRuntime::Create(options));
419 
420  // Builds up the structure of the network.
422 
423  IConnectableLayer* input = net->AddInputLayer(0);
424 
425  ActivationDescriptor descriptor;
427  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
428 
429  IConnectableLayer* output = net->AddOutputLayer(0);
430 
431  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
432  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
433 
434  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
435  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
436 
437  // optimize the network
438  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
439 
440  BOOST_TEST_CHECKPOINT("Load Network");
441  // Load it into the runtime. It should pass.
442  NetworkId netId;
443  std::string ignoredErrorMessage;
444  INetworkProperties networkProperties(true, false);
445  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
446  == Status::Success);
447 
448  BOOST_TEST_CHECKPOINT("Generate Data");
449  // Creates structures for input & output
450  std::vector<float> inputData
451  {
452  1.0f, 2.0f, 3.0f, 4.0f
453  };
454 
455  std::vector<float> outputData(4);
456 
457  std::vector<float> expectedOutput
458  {
459  1.0f, 4.0f, 9.0f, 16.0f
460  };
461 
462  BOOST_TEST_CHECKPOINT("Create Network");
463  InputTensors inputTensors
464  {
465  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
466  };
467  OutputTensors outputTensors
468  {
469  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
470  };
471 
472  BOOST_TEST_CHECKPOINT("Get Profiler");
473 
474  runtime->GetProfiler(netId)->EnableProfiling(true);
475 
476  BOOST_TEST_CHECKPOINT("Run Inference");
477  // Do the inference
478  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
479 
480  BOOST_TEST_CHECKPOINT("Print Profiler");
481  // Retrieve the Profiler.Print() output to get the workload execution
483  std::stringstream ss;
484  profilerManager.GetProfiler()->Print(ss);;
485  std::string dump = ss.str();
486 
487  // Check there are no SyncMemGeneric workloads as we didn't export
488  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
489  int count = SubStringCounter(dump, "SyncMemGeneric");
490  BOOST_TEST(count == 0);
491 
492  // Should only be 1 CopyMemGeneric for the output as we imported
493  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
494  count = SubStringCounter(dump, "CopyMemGeneric");
495  BOOST_TEST(count == 1);
496 
497  // Check the output is correct
498  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
499 }
500 
501 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
502 {
503  using namespace armnn;
504 
506  IRuntimePtr runtime(IRuntime::Create(options));
507 
508  // Builds up the structure of the network.
510 
511  IConnectableLayer* input = net->AddInputLayer(0);
512 
513  ActivationDescriptor descriptor;
515  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
516 
517  IConnectableLayer* output = net->AddOutputLayer(0);
518 
519  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
520  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
521 
522  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
523  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
524 
525  // optimize the network
526  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
527 
528  BOOST_TEST_CHECKPOINT("Load Network");
529  // Load it into the runtime. It should pass.
530  NetworkId netId;
531  std::string ignoredErrorMessage;
532  INetworkProperties networkProperties(false, true);
533  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
534  == Status::Success);
535 
536  BOOST_TEST_CHECKPOINT("Generate Data");
537  // Creates structures for input & output
538  std::vector<float> inputData
539  {
540  1.0f, 2.0f, 3.0f, 4.0f
541  };
542 
543  std::vector<float> outputData(4);
544 
545  std::vector<float> expectedOutput
546  {
547  1.0f, 4.0f, 9.0f, 16.0f
548  };
549 
550  BOOST_TEST_CHECKPOINT("Create Network");
551  InputTensors inputTensors
552  {
553  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
554  };
555  OutputTensors outputTensors
556  {
557  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
558  };
559 
560  BOOST_TEST_CHECKPOINT("Get Profiler");
561 
562  runtime->GetProfiler(netId)->EnableProfiling(true);
563 
564  BOOST_TEST_CHECKPOINT("Run Inference");
565  // Do the inference
566  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
567 
568  BOOST_TEST_CHECKPOINT("Print Profiler");
569  // Retrieve the Profiler.Print() output to get the workload execution
571  std::stringstream ss;
572  profilerManager.GetProfiler()->Print(ss);;
573  std::string dump = ss.str();
574 
575  // Check there is a SyncMemGeneric workload as we exported
576  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
577  int count = SubStringCounter(dump, "SyncMemGeneric");
578  BOOST_TEST(count == 1);
579 
580  // Should be 1 CopyMemGeneric for the output as we did not import
581  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
582  count = SubStringCounter(dump, "CopyMemGeneric");
583  BOOST_TEST(count == 1);
584 
585  // Check the output is correct
586  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
587 }
588 
589 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
590 {
591  using namespace armnn;
592 
594  IRuntimePtr runtime(IRuntime::Create(options));
595 
596  // Builds up the structure of the network.
598 
599  IConnectableLayer* input = net->AddInputLayer(0);
600 
601  ActivationDescriptor descriptor;
603  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
604 
605  IConnectableLayer* output = net->AddOutputLayer(0);
606 
607  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
608  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
609 
610  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
611  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
612 
613  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
614 
615  BOOST_TEST_CHECKPOINT("Load Network");
616  // Load it into the runtime. It should pass.
617  NetworkId netId;
618  std::string ignoredErrorMessage;
619  INetworkProperties networkProperties(true, true);
620  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
621  == Status::Success);
622 
623  BOOST_TEST_CHECKPOINT("Generate Data");
624  // Creates structures for input & output
625  std::vector<float> inputData
626  {
627  1.0f, 2.0f, 3.0f, 4.0f
628  };
629 
630  std::vector<float> outputData(4);
631 
632  std::vector<float> expectedOutput
633  {
634  1.0f, 4.0f, 9.0f, 16.0f
635  };
636 
637  BOOST_TEST_CHECKPOINT("Create Network");
638  InputTensors inputTensors
639  {
640  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
641  };
642  OutputTensors outputTensors
643  {
644  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
645  };
646 
647  BOOST_TEST_CHECKPOINT("Get Profiler");
648 
649  runtime->GetProfiler(netId)->EnableProfiling(true);
650 
651  BOOST_TEST_CHECKPOINT("Run Inference");
652  // Do the inference
653  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
654 
655  BOOST_TEST_CHECKPOINT("Print Profiler");
656  // Retrieve the Profiler.Print() output to get the workload execution
658  std::stringstream ss;
659  profilerManager.GetProfiler()->Print(ss);;
660  std::string dump = ss.str();
661 
662  // Check there is a SyncMemGeneric workload as we exported
663  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
664  int count = SubStringCounter(dump, "SyncMemGeneric");
665  BOOST_TEST(count == 1);
666 
667  // Shouldn't be any CopyMemGeneric workloads
668  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
669  count = SubStringCounter(dump, "CopyMemGeneric");
670  BOOST_TEST(count == 0);
671 
672  // Check the output is correct
673  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
674 }
675 
676 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
677 {
678  using namespace armnn;
679 
680  // Create runtime in which test will run
682  IRuntimePtr runtime(armnn::IRuntime::Create(options));
683 
684  // build up the structure of the network
686 
687  IConnectableLayer* input = net->AddInputLayer(0);
688 
689  ActivationDescriptor descriptor;
691  IConnectableLayer* activation = net->AddActivationLayer(descriptor);
692 
693  IConnectableLayer* output0 = net->AddOutputLayer(0);
694  IConnectableLayer* output1 = net->AddOutputLayer(1);
695 
696  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
697  activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
698  activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
699 
700  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
701  activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
702 
703  // Optimize the network
704  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
705 
706  // Loads it into the runtime.
707  NetworkId netId;
708  std::string ignoredErrorMessage;
709  // Enable Importing
710  INetworkProperties networkProperties(true, true);
711  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
712 
713  // Creates structures for input & output
714  std::vector<float> inputData
715  {
716  1.0f, 2.0f, 3.0f, 4.0f
717  };
718 
719  std::vector<float> outputData0(4);
720  std::vector<float> outputData1(4);
721 
722  InputTensors inputTensors
723  {
724  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
725  };
726  OutputTensors outputTensors
727  {
728  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
729  {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
730  };
731 
732  // The result of the inference is not important, just the fact that there
733  // should not be CopyMemGeneric workloads.
734  runtime->GetProfiler(netId)->EnableProfiling(true);
735 
736  // Do the inference
737  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
738 
739  // Retrieve the Profiler.Print() output to get the workload execution
741  std::stringstream ss;
742  profilerManager.GetProfiler()->Print(ss);
743  std::string dump = ss.str();
744 
745  std::size_t found = std::string::npos;
746 
747  if (backends[0] == Compute::CpuRef)
748  {
749  found = dump.find("RefActivationWorkload");
750  }
751  else if (backends[0] == Compute::CpuAcc)
752  {
753  found = dump.find("NeonActivationWorkload");
754  }
755  else if (backends[0] == Compute::GpuAcc)
756  {
757  found = dump.find("ClActivationWorkload");
758  }
759 
760  BOOST_TEST(found != std::string::npos);
761  // No contains SyncMemGeneric
762  found = dump.find("SyncMemGeneric");
763  BOOST_TEST(found == std::string::npos);
764  // Contains CopyMemGeneric
765  found = dump.find("CopyMemGeneric");
766  BOOST_TEST(found != std::string::npos);
767 }
768 
769 } // anonymous namespace
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:32
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
CPU Execution: Reference C++ kernels.
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:487
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:24
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:225
int NetworkId
Definition: IRuntime.hpp:19
Copyright (c) 2020 ARM Limited.
BOOST_CHECK(profilingService.GetCurrentState()==ProfilingState::WaitingForAck)
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
Profiler * GetProfiler()
Definition: Profiling.cpp:499
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:191
DataType
Definition: Types.hpp:32
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:890
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:199
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:226
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:566
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:259
GPU Execution: OpenCL: ArmCompute.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:20
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:275
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:101
virtual int Connect(IInputSlot &destination)=0
armnn::Runtime::CreationOptions::ExternalProfilingOptions options
void Print(std::ostream &outStream) const override
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:331
static INetworkPtr Create()
Definition: Network.cpp:49
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square).
Definition: Descriptors.hpp:35