ArmNN
 21.02
EndToEndTestImpl.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <armnn/Descriptors.hpp>
8 #include <armnn/INetwork.hpp>
9 #include <armnn/IRuntime.hpp>
10 
11 #include <Profiling.hpp>
12 #include <QuantizeHelper.hpp>
13 #include <ResolveType.hpp>
14 
15 #include <boost/test/unit_test.hpp>
16 
17 #include <vector>
18 
19 namespace
20 {
21 
22 using namespace armnn;
23 
24 template<typename T>
25 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
26  const TensorInfo& commonTensorInfo,
27  const std::vector<T>& inputData,
28  const std::vector<T>& constantData,
29  const std::vector<T>& expectedOutputData)
30 {
31  // Create runtime in which test will run
33  IRuntimePtr runtime(IRuntime::Create(options));
34 
35  // Builds up the structure of the network.
37 
38  IConnectableLayer* input = net->AddInputLayer(0);
39  IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
40  IConnectableLayer* add = net->AddAdditionLayer();
41  IConnectableLayer* output = net->AddOutputLayer(0);
42 
43  input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
44  constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
45  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
46 
47  // Sets the tensors in the network.
48  input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
49  constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
50  add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51 
52  // optimize the network
53  IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
54 
55  // Loads it into the runtime.
56  NetworkId netId;
57  runtime->LoadNetwork(netId, std::move(optNet));
58 
59  // Creates structures for input & output.
60  std::vector<T> outputData(inputData.size());
61 
62  InputTensors inputTensors
63  {
64  {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
65  };
66  OutputTensors outputTensors
67  {
68  {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
69  };
70 
71  // Does the inference.
72  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
73 
74  // Checks the results.
75  return outputData == expectedOutputData;
76 }
77 
78 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
79 {
80  const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
81 
82  return ConstantUsageTest(backends,
83  commonTensorInfo,
84  std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
85  std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
86  std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
87  );
88 }
89 
90 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
91 {
92  TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
93 
94  const float scale = 0.023529f;
95  const int8_t offset = -43;
96 
97  commonTensorInfo.SetQuantizationScale(scale);
98  commonTensorInfo.SetQuantizationOffset(offset);
99 
100  return ConstantUsageTest(backends,
101  commonTensorInfo,
102  armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
103  armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
104  armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
105  );
106 }
107 
108 // Utility template for comparing tensor elements
109 template<DataType ArmnnType, typename T = ResolveType<ArmnnType>>
110 bool Compare(T a, T b, float tolerance = 0.000001f)
111 {
112  if (ArmnnType == DataType::Boolean)
113  {
114  // NOTE: Boolean is represented as uint8_t (with zero equals
115  // false and everything else equals true), therefore values
116  // need to be casted to bool before comparing them
117  return static_cast<bool>(a) == static_cast<bool>(b);
118  }
119 
120  // NOTE: All other types can be cast to float and compared with
121  // a certain level of tolerance
122  return std::fabs(static_cast<float>(a) - static_cast<float>(b)) <= tolerance;
123 }
124 
125 // Utility function to find the number of instances of a substring within a string.
126 int SubStringCounter(std::string& string, std::string&& substring)
127 {
128  std::size_t found = 0;
129  int count = 0;
130  // Look for the substring starting from where we last found the substring
131  while((found = string.find(substring, found)) != std::string::npos)
132  {
133  count++;
134  // Offset by substring length to avoid finding the same substring twice
135  found += substring.length();
136  }
137  return count;
138 }
139 
140 template<DataType ArmnnIType, DataType ArmnnOType,
141  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
142 void EndToEndLayerTestImpl(INetworkPtr network,
143  const std::map<int, std::vector<TInput>>& inputTensorData,
144  const std::map<int, std::vector<TOutput>>& expectedOutputData,
145  std::vector<BackendId> backends,
146  float tolerance = 0.000001f)
147 {
148  // Create runtime in which test will run
150  IRuntimePtr runtime(IRuntime::Create(options));
151 
152  // optimize the network
153  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
154 
155  // Loads it into the runtime.
156  NetworkId netId;
157  runtime->LoadNetwork(netId, std::move(optNet));
158 
159  InputTensors inputTensors;
160  inputTensors.reserve(inputTensorData.size());
161  for (auto&& it : inputTensorData)
162  {
163  inputTensors.push_back({it.first,
164  ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
165  }
166  OutputTensors outputTensors;
167  outputTensors.reserve(expectedOutputData.size());
168  std::map<int, std::vector<TOutput>> outputStorage;
169  for (auto&& it : expectedOutputData)
170  {
171  std::vector<TOutput> out(it.second.size());
172  outputStorage.emplace(it.first, out);
173  outputTensors.push_back({it.first,
174  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
175  outputStorage.at(it.first).data())});
176  }
177 
178  // Does the inference.
179  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
180 
181  // Checks the results.
182  for (auto&& it : expectedOutputData)
183  {
184  std::vector<TOutput> out = outputStorage.at(it.first);
185  for (unsigned int i = 0; i < out.size(); ++i)
186  {
187  BOOST_CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
188  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
189 
190  }
191  }
192 }
193 
194 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
195 {
196  using namespace armnn;
197 
198  // Create runtime in which test will run
200  IRuntimePtr runtime(armnn::IRuntime::Create(options));
201 
202  // build up the structure of the network
204 
205  IConnectableLayer* input = net->AddInputLayer(0);
206 
207  ActivationDescriptor descriptor;
209  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
210 
211  IConnectableLayer* output = net->AddOutputLayer(0);
212 
213  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
214  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
215 
216  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
217  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
218 
219  // Optimize the network
220  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
221  BOOST_CHECK(optNet);
222 
223  // Loads it into the runtime.
224  NetworkId netId;
225  std::string ignoredErrorMessage;
226  // Enable Importing
227  INetworkProperties networkProperties(true, false);
228  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
229 
230  // Creates structures for input & output
231  std::vector<float> inputData
232  {
233  1.0f, 2.0f, 3.0f, 4.0f
234  };
235 
236  // Misaligned input
237  float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
238 
239  std::vector<float> outputData(4);
240 
241  // Aligned output
242  float* alignedOutputData = outputData.data();
243 
244  InputTensors inputTensors
245  {
246  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
247  };
248  OutputTensors outputTensors
249  {
250  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
251  };
252 
253  runtime->GetProfiler(netId)->EnableProfiling(true);
254 
255  // Do the inference and expect it to fail with a ImportMemoryException
256  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
257 }
258 
259 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
260 {
261  using namespace armnn;
262 
263  // Create runtime in which test will run
265  IRuntimePtr runtime(armnn::IRuntime::Create(options));
266 
267  // build up the structure of the network
269 
270  IConnectableLayer* input = net->AddInputLayer(0);
271 
272  ActivationDescriptor descriptor;
274  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
275 
276  IConnectableLayer* output = net->AddOutputLayer(0);
277 
278  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
279  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
280 
281  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
282  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
283 
284  // Optimize the network
285  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
286  BOOST_CHECK(optNet);
287 
288  // Loads it into the runtime.
289  NetworkId netId;
290  std::string ignoredErrorMessage;
291  // Enable Importing and Exporting
292  INetworkProperties networkProperties(true, true);
293  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
294 
295  // Creates structures for input & output
296  std::vector<float> inputData
297  {
298  1.0f, 2.0f, 3.0f, 4.0f, 5.0f
299  };
300 
301  // Aligned input
302  float* alignedInputData = inputData.data();
303 
304  std::vector<float> outputData(5);
305 
306  // Misaligned output
307  float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
308 
309  InputTensors inputTensors
310  {
311  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
312  };
313  OutputTensors outputTensors
314  {
315  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
316  };
317 
318  // Do the inference and expect it to fail with a ExportMemoryException
319  if (backends[0] == Compute::CpuAcc)
320  {
321  // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
322  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
323  }
324  else
325  {
326  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
327  }
328 }
329 
330 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
331 {
332  using namespace armnn;
333 
334  // Create runtime in which test will run
336  IRuntimePtr runtime(armnn::IRuntime::Create(options));
337 
338  // build up the structure of the network
340 
341  IConnectableLayer* input = net->AddInputLayer(0);
342 
343  ActivationDescriptor descriptor;
345  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
346 
347  IConnectableLayer* output = net->AddOutputLayer(0);
348 
349  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
350  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
351 
352  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
353  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
354 
355  // Optimize the network
356  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
357  BOOST_CHECK(optNet);
358 
359  // Loads it into the runtime.
360  NetworkId netId;
361  std::string ignoredErrorMessage;
362  // Enable Importing
363  INetworkProperties networkProperties(true, true);
364  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
365 
366  // Creates structures for input & output
367  std::vector<float> inputData
368  {
369  1.0f, 2.0f, 3.0f, 4.0f
370  };
371 
372  std::vector<float> outputData(4);
373 
374  std::vector<float> expectedOutput
375  {
376  1.0f, 4.0f, 9.0f, 16.0f
377  };
378 
379  InputTensors inputTensors
380  {
381  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
382  };
383  OutputTensors outputTensors
384  {
385  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
386  };
387 
388  runtime->GetProfiler(netId)->EnableProfiling(true);
389 
390  // Do the inference
391  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
392 
393  // Retrieve the Profiler.Print() output to get the workload execution
395  std::stringstream ss;
396  profilerManager.GetProfiler()->Print(ss);;
397  std::string dump = ss.str();
398 
399  // Contains ActivationWorkload
400  std::size_t found = dump.find("ActivationWorkload");
401  BOOST_TEST(found != std::string::npos);
402 
403  // Contains SyncMemGeneric
404  found = dump.find("SyncMemGeneric");
405  BOOST_TEST(found != std::string::npos);
406 
407  // Does not contain CopyMemGeneric
408  found = dump.find("CopyMemGeneric");
409  BOOST_TEST(found == std::string::npos);
410 
411  // Check output is as expected
412  BOOST_TEST(outputData == expectedOutput);
413 }
414 
415 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
416 {
417  using namespace armnn;
418 
420  IRuntimePtr runtime(IRuntime::Create(options));
421 
422  // Builds up the structure of the network.
424 
425  IConnectableLayer* input = net->AddInputLayer(0);
426 
427  ActivationDescriptor descriptor;
429  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
430 
431  IConnectableLayer* output = net->AddOutputLayer(0);
432 
433  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
434  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
435 
436  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
437  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
438 
439  // optimize the network
440  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
441 
442  BOOST_TEST_CHECKPOINT("Load Network");
443  // Load it into the runtime. It should pass.
444  NetworkId netId;
445  std::string ignoredErrorMessage;
446  INetworkProperties networkProperties(true, false);
447  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
448  == Status::Success);
449 
450  BOOST_TEST_CHECKPOINT("Generate Data");
451  // Creates structures for input & output
452  std::vector<float> inputData
453  {
454  1.0f, 2.0f, 3.0f, 4.0f
455  };
456 
457  std::vector<float> outputData(4);
458 
459  std::vector<float> expectedOutput
460  {
461  1.0f, 4.0f, 9.0f, 16.0f
462  };
463 
464  BOOST_TEST_CHECKPOINT("Create Network");
465  InputTensors inputTensors
466  {
467  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
468  };
469  OutputTensors outputTensors
470  {
471  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
472  };
473 
474  BOOST_TEST_CHECKPOINT("Get Profiler");
475 
476  runtime->GetProfiler(netId)->EnableProfiling(true);
477 
478  BOOST_TEST_CHECKPOINT("Run Inference");
479  // Do the inference
480  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
481 
482  BOOST_TEST_CHECKPOINT("Print Profiler");
483  // Retrieve the Profiler.Print() output to get the workload execution
485  std::stringstream ss;
486  profilerManager.GetProfiler()->Print(ss);;
487  std::string dump = ss.str();
488 
489  // Check there are no SyncMemGeneric workloads as we didn't export
490  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
491  int count = SubStringCounter(dump, "SyncMemGeneric");
492  BOOST_TEST(count == 0);
493 
494  // Should only be 1 CopyMemGeneric for the output as we imported
495  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
496  count = SubStringCounter(dump, "CopyMemGeneric");
497  BOOST_TEST(count == 1);
498 
499  // Check the output is correct
500  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
501 }
502 
503 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
504 {
505  using namespace armnn;
506 
508  IRuntimePtr runtime(IRuntime::Create(options));
509 
510  // Builds up the structure of the network.
512 
513  IConnectableLayer* input = net->AddInputLayer(0);
514 
515  ActivationDescriptor descriptor;
517  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
518 
519  IConnectableLayer* output = net->AddOutputLayer(0);
520 
521  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
522  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
523 
524  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
525  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
526 
527  // optimize the network
528  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
529 
530  BOOST_TEST_CHECKPOINT("Load Network");
531  // Load it into the runtime. It should pass.
532  NetworkId netId;
533  std::string ignoredErrorMessage;
534  INetworkProperties networkProperties(false, true);
535  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
536  == Status::Success);
537 
538  BOOST_TEST_CHECKPOINT("Generate Data");
539  // Creates structures for input & output
540  std::vector<float> inputData
541  {
542  1.0f, 2.0f, 3.0f, 4.0f
543  };
544 
545  std::vector<float> outputData(4);
546 
547  std::vector<float> expectedOutput
548  {
549  1.0f, 4.0f, 9.0f, 16.0f
550  };
551 
552  BOOST_TEST_CHECKPOINT("Create Network");
553  InputTensors inputTensors
554  {
555  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
556  };
557  OutputTensors outputTensors
558  {
559  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
560  };
561 
562  BOOST_TEST_CHECKPOINT("Get Profiler");
563 
564  runtime->GetProfiler(netId)->EnableProfiling(true);
565 
566  BOOST_TEST_CHECKPOINT("Run Inference");
567  // Do the inference
568  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
569 
570  BOOST_TEST_CHECKPOINT("Print Profiler");
571  // Retrieve the Profiler.Print() output to get the workload execution
573  std::stringstream ss;
574  profilerManager.GetProfiler()->Print(ss);;
575  std::string dump = ss.str();
576 
577  // Check there is a SyncMemGeneric workload as we exported
578  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
579  int count = SubStringCounter(dump, "SyncMemGeneric");
580  BOOST_TEST(count == 1);
581 
582  // Should be 1 CopyMemGeneric for the output as we did not import
583  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
584  count = SubStringCounter(dump, "CopyMemGeneric");
585  BOOST_TEST(count == 1);
586 
587  // Check the output is correct
588  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
589 }
590 
591 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
592 {
593  using namespace armnn;
594 
596  IRuntimePtr runtime(IRuntime::Create(options));
597 
598  // Builds up the structure of the network.
600 
601  IConnectableLayer* input = net->AddInputLayer(0);
602 
603  ActivationDescriptor descriptor;
605  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
606 
607  IConnectableLayer* output = net->AddOutputLayer(0);
608 
609  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
610  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
611 
612  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
613  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
614 
615  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
616 
617  BOOST_TEST_CHECKPOINT("Load Network");
618  // Load it into the runtime. It should pass.
619  NetworkId netId;
620  std::string ignoredErrorMessage;
621  INetworkProperties networkProperties(true, true);
622  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
623  == Status::Success);
624 
625  BOOST_TEST_CHECKPOINT("Generate Data");
626  // Creates structures for input & output
627  std::vector<float> inputData
628  {
629  1.0f, 2.0f, 3.0f, 4.0f
630  };
631 
632  std::vector<float> outputData(4);
633 
634  std::vector<float> expectedOutput
635  {
636  1.0f, 4.0f, 9.0f, 16.0f
637  };
638 
639  BOOST_TEST_CHECKPOINT("Create Network");
640  InputTensors inputTensors
641  {
642  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
643  };
644  OutputTensors outputTensors
645  {
646  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
647  };
648 
649  BOOST_TEST_CHECKPOINT("Get Profiler");
650 
651  runtime->GetProfiler(netId)->EnableProfiling(true);
652 
653  BOOST_TEST_CHECKPOINT("Run Inference");
654  // Do the inference
655  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
656 
657  BOOST_TEST_CHECKPOINT("Print Profiler");
658  // Retrieve the Profiler.Print() output to get the workload execution
660  std::stringstream ss;
661  profilerManager.GetProfiler()->Print(ss);;
662  std::string dump = ss.str();
663 
664  // Check there is a SyncMemGeneric workload as we exported
665  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
666  int count = SubStringCounter(dump, "SyncMemGeneric");
667  BOOST_TEST(count == 1);
668 
669  // Shouldn't be any CopyMemGeneric workloads
670  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
671  count = SubStringCounter(dump, "CopyMemGeneric");
672  BOOST_TEST(count == 0);
673 
674  // Check the output is correct
675  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
676 }
677 
678 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
679 {
680  using namespace armnn;
681 
682  // Create runtime in which test will run
684  IRuntimePtr runtime(armnn::IRuntime::Create(options));
685 
686  // build up the structure of the network
688 
689  IConnectableLayer* input = net->AddInputLayer(0);
690 
691  ActivationDescriptor descriptor;
693  IConnectableLayer* activation = net->AddActivationLayer(descriptor);
694 
695  IConnectableLayer* output0 = net->AddOutputLayer(0);
696  IConnectableLayer* output1 = net->AddOutputLayer(1);
697 
698  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
699  activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
700  activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
701 
702  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
703  activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
704 
705  // Optimize the network
706  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
707 
708  // Loads it into the runtime.
709  NetworkId netId;
710  std::string ignoredErrorMessage;
711  // Enable Importing
712  INetworkProperties networkProperties(true, true);
713  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
714 
715  // Creates structures for input & output
716  std::vector<float> inputData
717  {
718  1.0f, 2.0f, 3.0f, 4.0f
719  };
720 
721  std::vector<float> outputData0(4);
722  std::vector<float> outputData1(4);
723 
724  std::vector<float> expectedOutput
725  {
726  1.0f, 4.0f, 9.0f, 16.0f
727  };
728 
729  InputTensors inputTensors
730  {
731  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
732  };
733  OutputTensors outputTensors
734  {
735  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
736  {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
737  };
738 
739  // The result of the inference is not important, just the fact that there
740  // should not be CopyMemGeneric workloads.
741  runtime->GetProfiler(netId)->EnableProfiling(true);
742 
743  // Do the inference
744  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
745 
746  // Retrieve the Profiler.Print() output to get the workload execution
748  std::stringstream ss;
749  profilerManager.GetProfiler()->Print(ss);
750  std::string dump = ss.str();
751 
752  std::size_t found = std::string::npos;
753 
754  if (backends[0] == Compute::CpuRef)
755  {
756  found = dump.find("RefActivationWorkload");
757  }
758  else if (backends[0] == Compute::CpuAcc)
759  {
760  found = dump.find("NeonActivationWorkload");
761  }
762  else if (backends[0] == Compute::GpuAcc)
763  {
764  found = dump.find("ClActivationWorkload");
765  }
766 
767  BOOST_TEST(found != std::string::npos);
768  // No contains SyncMemGeneric
769  found = dump.find("SyncMemGeneric");
770  BOOST_TEST(found == std::string::npos);
771  // Contains CopyMemGeneric
772  found = dump.find("CopyMemGeneric");
773  BOOST_TEST(found != std::string::npos);
774 
775  // Check that the outputs are correct
776  BOOST_CHECK_EQUAL_COLLECTIONS(outputData0.begin(), outputData0.end(),
777  expectedOutput.begin(), expectedOutput.end());
778  BOOST_CHECK_EQUAL_COLLECTIONS(outputData1.begin(), outputData1.end(),
779  expectedOutput.begin(), expectedOutput.end());
780 }
781 
782 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
783 {
784  using namespace armnn;
785 
786  // Create runtime in which test will run
788  IRuntimePtr runtime(armnn::IRuntime::Create(options));
789 
790  // build up the structure of the network
792 
793  IConnectableLayer* input = net->AddInputLayer(0);
794 
795  // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
796  // dim of the output to make it too small to hold the specified slice.
797  StridedSliceDescriptor descriptor;
798  descriptor.m_Begin = {0, 0};
799  descriptor.m_End = {2, 3};
800  descriptor.m_Stride = {1, 1};
801  descriptor.m_BeginMask = 0;
802  descriptor.m_EndMask = 0;
803  descriptor.m_ShrinkAxisMask = 1;
804  IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
805 
806  IConnectableLayer* output0 = net->AddOutputLayer(0);
807 
808  input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
809  stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
810 
812  stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
813 
814  // Attempt to optimize the network and check that the correct exception is thrown
815  BOOST_CHECK_THROW(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
816 }
817 
818 } // anonymous namespace
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
CPU Execution: Reference C++ kernels.
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:26
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:73
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
int NetworkId
Definition: IRuntime.hpp:20
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
DataType
Definition: Types.hpp:32
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1502
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:464
GPU Execution: OpenCL: ArmCompute.
std::vector< int > m_Stride
Stride values for the input that will be sliced.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
std::vector< int > m_End
End values for the input that will be sliced.
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
A StridedSliceDescriptor for the StridedSliceLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:480
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:510
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48
bool Compare(T a, T b, float tolerance=0.000001f)