ArmNN
 21.05
EndToEndTestImpl.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "CommonTestUtils.hpp"
8 
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <armnn/IRuntime.hpp>
12 
13 #include <Profiling.hpp>
14 #include <QuantizeHelper.hpp>
15 #include <ResolveType.hpp>
16 
17 #include <boost/test/unit_test.hpp>
18 
19 #include <vector>
20 
21 namespace
22 {
23 
24 using namespace armnn;
25 
26 template<typename T>
27 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28  const TensorInfo& commonTensorInfo,
29  const std::vector<T>& inputData,
30  const std::vector<T>& constantData,
31  const std::vector<T>& expectedOutputData)
32 {
33  // Create runtime in which test will run
35  IRuntimePtr runtime(IRuntime::Create(options));
36 
37  // Builds up the structure of the network.
39 
40  IConnectableLayer* input = net->AddInputLayer(0);
41  IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42  IConnectableLayer* add = net->AddAdditionLayer();
43  IConnectableLayer* output = net->AddOutputLayer(0);
44 
45  input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48 
49  // Sets the tensors in the network.
50  input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51  constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52  add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53 
54  // optimize the network
55  IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56 
57  // Loads it into the runtime.
58  NetworkId netId;
59  runtime->LoadNetwork(netId, std::move(optNet));
60 
61  // Creates structures for input & output.
62  std::vector<T> outputData(inputData.size());
63 
64  InputTensors inputTensors
65  {
66  {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67  };
68  OutputTensors outputTensors
69  {
70  {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71  };
72 
73  // Does the inference.
74  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75 
76  // Checks the results.
77  return outputData == expectedOutputData;
78 }
79 
80 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81 {
82  const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83 
84  return ConstantUsageTest(backends,
85  commonTensorInfo,
86  std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
87  std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
88  std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
89  );
90 }
91 
92 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
93 {
94  TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
95 
96  const float scale = 0.023529f;
97  const int8_t offset = -43;
98 
99  commonTensorInfo.SetQuantizationScale(scale);
100  commonTensorInfo.SetQuantizationOffset(offset);
101 
102  return ConstantUsageTest(backends,
103  commonTensorInfo,
104  armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
105  armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
106  armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
107  );
108 }
109 
110 // Utility function to find the number of instances of a substring within a string.
111 int SubStringCounter(std::string& string, std::string&& substring)
112 {
113  std::size_t found = 0;
114  int count = 0;
115  // Look for the substring starting from where we last found the substring
116  while((found = string.find(substring, found)) != std::string::npos)
117  {
118  count++;
119  // Offset by substring length to avoid finding the same substring twice
120  found += substring.length();
121  }
122  return count;
123 }
124 
125 template<DataType ArmnnIType, DataType ArmnnOType,
126  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
127 void EndToEndLayerTestImpl(INetworkPtr network,
128  const std::map<int, std::vector<TInput>>& inputTensorData,
129  const std::map<int, std::vector<TOutput>>& expectedOutputData,
130  std::vector<BackendId> backends,
131  float tolerance = 0.000001f)
132 {
133  // Create runtime in which test will run
135  IRuntimePtr runtime(IRuntime::Create(options));
136 
137  // optimize the network
138  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
139 
140  // Loads it into the runtime.
141  NetworkId netId;
142  runtime->LoadNetwork(netId, std::move(optNet));
143 
144  InputTensors inputTensors;
145  inputTensors.reserve(inputTensorData.size());
146  for (auto&& it : inputTensorData)
147  {
148  inputTensors.push_back({it.first,
149  ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
150  }
151  OutputTensors outputTensors;
152  outputTensors.reserve(expectedOutputData.size());
153  std::map<int, std::vector<TOutput>> outputStorage;
154  for (auto&& it : expectedOutputData)
155  {
156  std::vector<TOutput> out(it.second.size());
157  outputStorage.emplace(it.first, out);
158  outputTensors.push_back({it.first,
159  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
160  outputStorage.at(it.first).data())});
161  }
162 
163  // Does the inference.
164  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
165 
166  // Checks the results.
167  for (auto&& it : expectedOutputData)
168  {
169  std::vector<TOutput> out = outputStorage.at(it.first);
170  for (unsigned int i = 0; i < out.size(); ++i)
171  {
172  BOOST_CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
173  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
174 
175  }
176  }
177 }
178 
179 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
180 {
181  using namespace armnn;
182 
183  // Create runtime in which test will run
185  IRuntimePtr runtime(armnn::IRuntime::Create(options));
186 
187  // build up the structure of the network
189 
190  IConnectableLayer* input = net->AddInputLayer(0);
191 
192  ActivationDescriptor descriptor;
194  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
195 
196  IConnectableLayer* output = net->AddOutputLayer(0);
197 
198  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
199  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
200 
201  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
202  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
203 
204  // Optimize the network
205  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
206  BOOST_CHECK(optNet);
207 
208  // Loads it into the runtime.
209  NetworkId netId;
210  std::string ignoredErrorMessage;
211  // Enable Importing
213  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
214 
215  // Creates structures for input & output
216  std::vector<float> inputData
217  {
218  1.0f, 2.0f, 3.0f, 4.0f
219  };
220 
221  // Misaligned input
222  float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
223 
224  std::vector<float> outputData(4);
225 
226  // Aligned output
227  float* alignedOutputData = outputData.data();
228 
229  InputTensors inputTensors
230  {
231  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
232  };
233  OutputTensors outputTensors
234  {
235  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
236  };
237 
238  runtime->GetProfiler(netId)->EnableProfiling(true);
239 
240  // Do the inference and expect it to fail with a ImportMemoryException
241  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
242 }
243 
244 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
245 {
246  using namespace armnn;
247 
248  // Create runtime in which test will run
250  IRuntimePtr runtime(armnn::IRuntime::Create(options));
251 
252  // build up the structure of the network
254 
255  IConnectableLayer* input = net->AddInputLayer(0);
256 
257  ActivationDescriptor descriptor;
259  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
260 
261  IConnectableLayer* output = net->AddOutputLayer(0);
262 
263  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
264  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
265 
266  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
267  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
268 
269  // Optimize the network
270  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
271  BOOST_CHECK(optNet);
272 
273  // Loads it into the runtime.
274  NetworkId netId;
275  std::string ignoredErrorMessage;
276  // Enable Importing and Exporting
278  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
279 
280  // Creates structures for input & output
281  std::vector<float> inputData
282  {
283  1.0f, 2.0f, 3.0f, 4.0f, 5.0f
284  };
285 
286  // Aligned input
287  float* alignedInputData = inputData.data();
288 
289  std::vector<float> outputData(5);
290 
291  // Misaligned output
292  float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
293 
294  InputTensors inputTensors
295  {
296  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
297  };
298  OutputTensors outputTensors
299  {
300  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
301  };
302 
303  // Do the inference and expect it to fail with a ExportMemoryException
304  if (backends[0] == Compute::CpuAcc)
305  {
306  // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
307  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
308  }
309  else
310  {
311  BOOST_CHECK_THROW(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
312  }
313 }
314 
315 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
316 {
317  using namespace armnn;
318 
319  // Create runtime in which test will run
321  IRuntimePtr runtime(armnn::IRuntime::Create(options));
322 
323  // build up the structure of the network
325 
326  IConnectableLayer* input = net->AddInputLayer(0);
327 
328  ActivationDescriptor descriptor;
330  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
331 
332  IConnectableLayer* output = net->AddOutputLayer(0);
333 
334  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
335  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
336 
337  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
338  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
339 
340  // Optimize the network
341  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
342  BOOST_CHECK(optNet);
343 
344  // Loads it into the runtime.
345  NetworkId netId;
346  std::string ignoredErrorMessage;
347  // Enable Importing
349  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
350 
351  // Creates structures for input & output
352  std::vector<float> inputData
353  {
354  1.0f, 2.0f, 3.0f, 4.0f
355  };
356 
357  std::vector<float> outputData(4);
358 
359  std::vector<float> expectedOutput
360  {
361  1.0f, 4.0f, 9.0f, 16.0f
362  };
363 
364  InputTensors inputTensors
365  {
366  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
367  };
368  OutputTensors outputTensors
369  {
370  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
371  };
372 
373  runtime->GetProfiler(netId)->EnableProfiling(true);
374 
375  // Do the inference
376  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
377 
378  // Retrieve the Profiler.Print() output to get the workload execution
380  std::stringstream ss;
381  profilerManager.GetProfiler()->Print(ss);;
382  std::string dump = ss.str();
383 
384  // Contains ActivationWorkload
385  std::size_t found = dump.find("ActivationWorkload");
386  BOOST_TEST(found != std::string::npos);
387 
388  // Contains SyncMemGeneric
389  found = dump.find("SyncMemGeneric");
390  BOOST_TEST(found != std::string::npos);
391 
392  // Does not contain CopyMemGeneric
393  found = dump.find("CopyMemGeneric");
394  BOOST_TEST(found == std::string::npos);
395 
396  // Check output is as expected
397  BOOST_TEST(outputData == expectedOutput);
398 }
399 
400 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
401 {
402  using namespace armnn;
403 
405  IRuntimePtr runtime(IRuntime::Create(options));
406 
407  // Builds up the structure of the network.
409 
410  IConnectableLayer* input = net->AddInputLayer(0);
411 
412  ActivationDescriptor descriptor;
414  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
415 
416  IConnectableLayer* output = net->AddOutputLayer(0);
417 
418  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
419  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
420 
421  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
422  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
423 
424  // optimize the network
425  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
426 
427  BOOST_TEST_CHECKPOINT("Load Network");
428  // Load it into the runtime. It should pass.
429  NetworkId netId;
430  std::string ignoredErrorMessage;
431 
433 
434  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
435  == Status::Success);
436 
437  BOOST_TEST_CHECKPOINT("Generate Data");
438  // Creates structures for input & output
439  std::vector<float> inputData
440  {
441  1.0f, 2.0f, 3.0f, 4.0f
442  };
443 
444  std::vector<float> outputData(4);
445 
446  std::vector<float> expectedOutput
447  {
448  1.0f, 4.0f, 9.0f, 16.0f
449  };
450 
451  BOOST_TEST_CHECKPOINT("Create Network");
452  InputTensors inputTensors
453  {
454  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
455  };
456  OutputTensors outputTensors
457  {
458  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
459  };
460 
461  BOOST_TEST_CHECKPOINT("Get Profiler");
462 
463  runtime->GetProfiler(netId)->EnableProfiling(true);
464 
465  BOOST_TEST_CHECKPOINT("Run Inference");
466  // Do the inference
467  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
468 
469  BOOST_TEST_CHECKPOINT("Print Profiler");
470  // Retrieve the Profiler.Print() output to get the workload execution
472  std::stringstream ss;
473  profilerManager.GetProfiler()->Print(ss);;
474  std::string dump = ss.str();
475 
476  // Check there are no SyncMemGeneric workloads as we didn't export
477  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
478  int count = SubStringCounter(dump, "SyncMemGeneric");
479  BOOST_TEST(count == 0);
480 
481  // Should only be 1 CopyMemGeneric for the output as we imported
482  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
483  count = SubStringCounter(dump, "CopyMemGeneric");
484  BOOST_TEST(count == 1);
485 
486  // Check the output is correct
487  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
488 }
489 
490 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
491 {
492  using namespace armnn;
493 
495  IRuntimePtr runtime(IRuntime::Create(options));
496 
497  // Builds up the structure of the network.
499 
500  IConnectableLayer* input = net->AddInputLayer(0);
501 
502  ActivationDescriptor descriptor;
504  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
505 
506  IConnectableLayer* output = net->AddOutputLayer(0);
507 
508  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
509  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
510 
511  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
512  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
513 
514  // optimize the network
515  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
516 
517  BOOST_TEST_CHECKPOINT("Load Network");
518  // Load it into the runtime. It should pass.
519  NetworkId netId;
520  std::string ignoredErrorMessage;
522  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
523  == Status::Success);
524 
525  BOOST_TEST_CHECKPOINT("Generate Data");
526  // Creates structures for input & output
527  std::vector<float> inputData
528  {
529  1.0f, 2.0f, 3.0f, 4.0f
530  };
531 
532  std::vector<float> outputData(4);
533 
534  std::vector<float> expectedOutput
535  {
536  1.0f, 4.0f, 9.0f, 16.0f
537  };
538 
539  BOOST_TEST_CHECKPOINT("Create Network");
540  InputTensors inputTensors
541  {
542  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
543  };
544  OutputTensors outputTensors
545  {
546  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
547  };
548 
549  BOOST_TEST_CHECKPOINT("Get Profiler");
550 
551  runtime->GetProfiler(netId)->EnableProfiling(true);
552 
553  BOOST_TEST_CHECKPOINT("Run Inference");
554  // Do the inference
555  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
556 
557  BOOST_TEST_CHECKPOINT("Print Profiler");
558  // Retrieve the Profiler.Print() output to get the workload execution
560  std::stringstream ss;
561  profilerManager.GetProfiler()->Print(ss);;
562  std::string dump = ss.str();
563 
564  // Check there is a SyncMemGeneric workload as we exported
565  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
566  int count = SubStringCounter(dump, "SyncMemGeneric");
567  BOOST_TEST(count == 1);
568 
569  // Should be 1 CopyMemGeneric for the output as we did not import
570  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
571  count = SubStringCounter(dump, "CopyMemGeneric");
572  BOOST_TEST(count == 1);
573 
574  // Check the output is correct
575  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
576 }
577 
578 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
579 {
580  using namespace armnn;
581 
583  IRuntimePtr runtime(IRuntime::Create(options));
584 
585  // Builds up the structure of the network.
587 
588  IConnectableLayer* input = net->AddInputLayer(0);
589 
590  ActivationDescriptor descriptor;
592  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
593 
594  IConnectableLayer* output = net->AddOutputLayer(0);
595 
596  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
597  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
598 
599  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
600  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
601 
602  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
603 
604  BOOST_TEST_CHECKPOINT("Load Network");
605  // Load it into the runtime. It should pass.
606  NetworkId netId;
607  std::string ignoredErrorMessage;
608 
610 
611  BOOST_TEST(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
612  == Status::Success);
613 
614  BOOST_TEST_CHECKPOINT("Generate Data");
615  // Creates structures for input & output
616  std::vector<float> inputData
617  {
618  1.0f, 2.0f, 3.0f, 4.0f
619  };
620 
621  std::vector<float> outputData(4);
622 
623  std::vector<float> expectedOutput
624  {
625  1.0f, 4.0f, 9.0f, 16.0f
626  };
627 
628  BOOST_TEST_CHECKPOINT("Create Network");
629  InputTensors inputTensors
630  {
631  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
632  };
633  OutputTensors outputTensors
634  {
635  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
636  };
637 
638  BOOST_TEST_CHECKPOINT("Get Profiler");
639 
640  runtime->GetProfiler(netId)->EnableProfiling(true);
641 
642  BOOST_TEST_CHECKPOINT("Run Inference");
643  // Do the inference
644  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
645 
646  BOOST_TEST_CHECKPOINT("Print Profiler");
647  // Retrieve the Profiler.Print() output to get the workload execution
649  std::stringstream ss;
650  profilerManager.GetProfiler()->Print(ss);;
651  std::string dump = ss.str();
652 
653  // Check there is a SyncMemGeneric workload as we exported
654  BOOST_TEST_CHECKPOINT("Find SyncMemGeneric");
655  int count = SubStringCounter(dump, "SyncMemGeneric");
656  BOOST_TEST(count == 1);
657 
658  // Shouldn't be any CopyMemGeneric workloads
659  BOOST_TEST_CHECKPOINT("Find CopyMemGeneric");
660  count = SubStringCounter(dump, "CopyMemGeneric");
661  BOOST_TEST(count == 0);
662 
663  // Check the output is correct
664  BOOST_CHECK_EQUAL_COLLECTIONS(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end());
665 }
666 
667 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
668 {
669  using namespace armnn;
670 
671  // Create runtime in which test will run
673  IRuntimePtr runtime(armnn::IRuntime::Create(options));
674 
675  // build up the structure of the network
677 
678  IConnectableLayer* input = net->AddInputLayer(0);
679 
680  ActivationDescriptor descriptor;
682  IConnectableLayer* activation = net->AddActivationLayer(descriptor);
683 
684  IConnectableLayer* output0 = net->AddOutputLayer(0);
685  IConnectableLayer* output1 = net->AddOutputLayer(1);
686 
687  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
688  activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
689  activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
690 
691  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
692  activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
693 
694  // Optimize the network
695  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
696 
697  // Loads it into the runtime.
698  NetworkId netId;
699  std::string ignoredErrorMessage;
700  // Enable Importing
702  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
703 
704  // Creates structures for input & output
705  std::vector<float> inputData
706  {
707  1.0f, 2.0f, 3.0f, 4.0f
708  };
709 
710  std::vector<float> outputData0(4);
711  std::vector<float> outputData1(4);
712 
713  std::vector<float> expectedOutput
714  {
715  1.0f, 4.0f, 9.0f, 16.0f
716  };
717 
718  InputTensors inputTensors
719  {
720  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
721  };
722  OutputTensors outputTensors
723  {
724  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
725  {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
726  };
727 
728  // The result of the inference is not important, just the fact that there
729  // should not be CopyMemGeneric workloads.
730  runtime->GetProfiler(netId)->EnableProfiling(true);
731 
732  // Do the inference
733  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
734 
735  // Retrieve the Profiler.Print() output to get the workload execution
737  std::stringstream ss;
738  profilerManager.GetProfiler()->Print(ss);
739  std::string dump = ss.str();
740 
741  std::size_t found = std::string::npos;
742 
743  if (backends[0] == Compute::CpuRef)
744  {
745  found = dump.find("RefActivationWorkload");
746  }
747  else if (backends[0] == Compute::CpuAcc)
748  {
749  found = dump.find("NeonActivationWorkload");
750  }
751  else if (backends[0] == Compute::GpuAcc)
752  {
753  found = dump.find("ClActivationWorkload");
754  }
755 
756  BOOST_TEST(found != std::string::npos);
757  // No contains SyncMemGeneric
758  found = dump.find("SyncMemGeneric");
759  BOOST_TEST(found == std::string::npos);
760  // Contains CopyMemGeneric
761  found = dump.find("CopyMemGeneric");
762  BOOST_TEST(found != std::string::npos);
763 
764  // Check that the outputs are correct
765  BOOST_CHECK_EQUAL_COLLECTIONS(outputData0.begin(), outputData0.end(),
766  expectedOutput.begin(), expectedOutput.end());
767  BOOST_CHECK_EQUAL_COLLECTIONS(outputData1.begin(), outputData1.end(),
768  expectedOutput.begin(), expectedOutput.end());
769 }
770 
771 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
772 {
773  using namespace armnn;
774 
775  // Create runtime in which test will run
777  IRuntimePtr runtime(armnn::IRuntime::Create(options));
778 
779  // build up the structure of the network
781 
782  IConnectableLayer* input = net->AddInputLayer(0);
783 
784  // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
785  // dim of the output to make it too small to hold the specified slice.
786  StridedSliceDescriptor descriptor;
787  descriptor.m_Begin = {0, 0};
788  descriptor.m_End = {2, 3};
789  descriptor.m_Stride = {1, 1};
790  descriptor.m_BeginMask = 0;
791  descriptor.m_EndMask = 0;
792  descriptor.m_ShrinkAxisMask = 1;
793  IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
794 
795  IConnectableLayer* output0 = net->AddOutputLayer(0);
796 
797  input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
798  stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
799 
801  stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
802 
803  // Attempt to optimize the network and check that the correct exception is thrown
804  BOOST_CHECK_THROW(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
805 }
806 
807 } // anonymous namespace
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:62
CPU Execution: Reference C++ kernels.
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:489
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:28
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:522
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:340
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:501
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:306
DataType
Definition: Types.hpp:36
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1568
int NetworkId
Definition: IRuntime.hpp:22
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:314
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:341
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:464
GPU Execution: OpenCL: ArmCompute.
std::vector< int > m_Stride
Stride values for the input that will be sliced.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
std::vector< int > m_End
End values for the input that will be sliced.
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
A StridedSliceDescriptor for the StridedSliceLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:480
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:529
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48