ArmNN
 21.11
EndToEndTestImpl.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "CommonTestUtils.hpp"
8 
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <armnn/IRuntime.hpp>
12 
13 #include <Profiling.hpp>
14 #include <QuantizeHelper.hpp>
15 #include <ResolveType.hpp>
16 
17 #include <doctest/doctest.h>
18 
19 #include <vector>
20 
21 namespace
22 {
23 
24 using namespace armnn;
25 
26 template<typename T>
27 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28  const TensorInfo& commonTensorInfo,
29  const std::vector<T>& inputData,
30  const std::vector<T>& constantData,
31  const std::vector<T>& expectedOutputData)
32 {
33  // Create runtime in which test will run
35  IRuntimePtr runtime(IRuntime::Create(options));
36 
37  // Builds up the structure of the network.
39 
40  IConnectableLayer* input = net->AddInputLayer(0);
41  IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42  IConnectableLayer* add = net->AddAdditionLayer();
43  IConnectableLayer* output = net->AddOutputLayer(0);
44 
45  input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48 
49  // Sets the tensors in the network.
50  input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51  constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52  add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53 
54  // optimize the network
55  IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56 
57  // Loads it into the runtime.
58  NetworkId netId;
59  runtime->LoadNetwork(netId, std::move(optNet));
60 
61  // Creates structures for input & output.
62  std::vector<T> outputData(inputData.size());
63 
64  InputTensors inputTensors
65  {
66  {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67  };
68  OutputTensors outputTensors
69  {
70  {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71  };
72 
73  // Does the inference.
74  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75 
76  // Checks the results.
77  return outputData == expectedOutputData;
78 }
79 
80 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81 {
82  TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83  commonTensorInfo.SetConstant(true);
84 
85  return ConstantUsageTest(backends,
86  commonTensorInfo,
87  std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
88  std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
89  std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
90  );
91 }
92 
93 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
94 {
95  TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
96 
97  const float scale = 0.023529f;
98  const int8_t offset = -43;
99 
100  commonTensorInfo.SetQuantizationScale(scale);
101  commonTensorInfo.SetQuantizationOffset(offset);
102  commonTensorInfo.SetConstant(true);
103 
104  return ConstantUsageTest(backends,
105  commonTensorInfo,
106  armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
107  armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
108  armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
109  );
110 }
111 
112 // Utility function to find the number of instances of a substring within a string.
113 int SubStringCounter(std::string& string, std::string&& substring)
114 {
115  std::size_t found = 0;
116  int count = 0;
117  // Look for the substring starting from where we last found the substring
118  while((found = string.find(substring, found)) != std::string::npos)
119  {
120  count++;
121  // Offset by substring length to avoid finding the same substring twice
122  found += substring.length();
123  }
124  return count;
125 }
126 
127 template<DataType ArmnnIType, DataType ArmnnOType,
128  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
129 void EndToEndLayerTestImpl(INetworkPtr network,
130  const std::map<int, std::vector<TInput>>& inputTensorData,
131  const std::map<int, std::vector<TOutput>>& expectedOutputData,
132  std::vector<BackendId> backends,
133  float tolerance = 0.000001f)
134 {
135  // Create runtime in which test will run
137  IRuntimePtr runtime(IRuntime::Create(options));
138 
139  // optimize the network
140  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
141 
142  // Loads it into the runtime.
143  NetworkId netId;
144  runtime->LoadNetwork(netId, std::move(optNet));
145 
146  InputTensors inputTensors;
147  inputTensors.reserve(inputTensorData.size());
148  for (auto&& it : inputTensorData)
149  {
150  inputTensors.push_back({it.first,
151  ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
152  }
153  OutputTensors outputTensors;
154  outputTensors.reserve(expectedOutputData.size());
155  std::map<int, std::vector<TOutput>> outputStorage;
156  for (auto&& it : expectedOutputData)
157  {
158  std::vector<TOutput> out(it.second.size());
159  outputStorage.emplace(it.first, out);
160  outputTensors.push_back({it.first,
161  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
162  outputStorage.at(it.first).data())});
163  }
164 
165  // Does the inference.
166  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
167 
168  // Checks the results.
169  for (auto&& it : expectedOutputData)
170  {
171  std::vector<TOutput> out = outputStorage.at(it.first);
172  for (unsigned int i = 0; i < out.size(); ++i)
173  {
174  CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
175  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
176 
177  }
178  }
179 }
180 
181 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
182 {
183  using namespace armnn;
184 
185  // Create runtime in which test will run
187  IRuntimePtr runtime(armnn::IRuntime::Create(options));
188 
189  // build up the structure of the network
191 
192  IConnectableLayer* input = net->AddInputLayer(0);
193 
194  ActivationDescriptor descriptor;
196  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
197 
198  IConnectableLayer* output = net->AddOutputLayer(0);
199 
200  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
201  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
202 
203  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
204  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
205 
206  // Optimize the network
207  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
208  CHECK(optNet);
209 
210  // Loads it into the runtime.
211  NetworkId netId;
212  std::string ignoredErrorMessage;
213  // Enable Importing
215  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
216 
217  // Creates structures for input & output
218  std::vector<float> inputData
219  {
220  1.0f, 2.0f, 3.0f, 4.0f
221  };
222 
223  // Misaligned input
224  float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
225 
226  std::vector<float> outputData(4);
227 
228  // Aligned output
229  float* alignedOutputData = outputData.data();
230 
231  InputTensors inputTensors
232  {
233  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
234  };
235  OutputTensors outputTensors
236  {
237  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
238  };
239 
240  runtime->GetProfiler(netId)->EnableProfiling(true);
241 
242  // Do the inference and expect it to fail with a ImportMemoryException
243  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
244 }
245 
246 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
247 {
248  using namespace armnn;
249 
250  // Create runtime in which test will run
252  IRuntimePtr runtime(armnn::IRuntime::Create(options));
253 
254  // build up the structure of the network
256 
257  IConnectableLayer* input = net->AddInputLayer(0);
258 
259  ActivationDescriptor descriptor;
261  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
262 
263  IConnectableLayer* output = net->AddOutputLayer(0);
264 
265  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
266  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
267 
268  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
269  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
270 
271  // Optimize the network
272  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
273  CHECK(optNet);
274 
275  // Loads it into the runtime.
276  NetworkId netId;
277  std::string ignoredErrorMessage;
278  // Enable Importing and Exporting
280  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
281 
282  // Creates structures for input & output
283  std::vector<float> inputData
284  {
285  1.0f, 2.0f, 3.0f, 4.0f, 5.0f
286  };
287 
288  // Aligned input
289  float* alignedInputData = inputData.data();
290 
291  std::vector<float> outputData(5);
292 
293  // Misaligned output
294  float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
295 
296  InputTensors inputTensors
297  {
298  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
299  };
300  OutputTensors outputTensors
301  {
302  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
303  };
304 
305  // Do the inference and expect it to fail with a ExportMemoryException
306  if (backends[0] == Compute::CpuAcc)
307  {
308  // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
309  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
310  }
311  else
312  {
313  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
314  }
315 }
316 
317 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
318 {
319  using namespace armnn;
320 
321  // Create runtime in which test will run
323  IRuntimePtr runtime(armnn::IRuntime::Create(options));
324 
325  // build up the structure of the network
327 
328  IConnectableLayer* input = net->AddInputLayer(0);
329 
330  ActivationDescriptor descriptor;
332  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
333 
334  IConnectableLayer* output = net->AddOutputLayer(0);
335 
336  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
337  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
338 
339  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
340  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
341 
342  // Optimize the network
343  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
344  CHECK(optNet);
345 
346  // Loads it into the runtime.
347  NetworkId netId;
348  std::string ignoredErrorMessage;
349  // Enable Importing
351  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
352 
353  // Creates structures for input & output
354  std::vector<float> inputData
355  {
356  1.0f, 2.0f, 3.0f, 4.0f
357  };
358 
359  std::vector<float> outputData(4);
360 
361  std::vector<float> expectedOutput
362  {
363  1.0f, 4.0f, 9.0f, 16.0f
364  };
365 
366  InputTensors inputTensors
367  {
368  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
369  };
370  OutputTensors outputTensors
371  {
372  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
373  };
374 
375  runtime->GetProfiler(netId)->EnableProfiling(true);
376 
377  // Do the inference
378  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
379 
380  // Retrieve the Profiler.Print() output to get the workload execution
382  std::stringstream ss;
383  profilerManager.GetProfiler()->Print(ss);;
384  std::string dump = ss.str();
385 
386  // Contains ActivationWorkload
387  std::size_t found = dump.find("ActivationWorkload");
388  CHECK(found != std::string::npos);
389 
390  // Contains SyncMemGeneric
391  found = dump.find("SyncMemGeneric");
392  CHECK(found != std::string::npos);
393 
394  // Does not contain CopyMemGeneric
395  found = dump.find("CopyMemGeneric");
396  CHECK(found == std::string::npos);
397 
398  // Check output is as expected
399  CHECK(outputData == expectedOutput);
400 }
401 
402 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
403 {
404  using namespace armnn;
405 
407  IRuntimePtr runtime(IRuntime::Create(options));
408 
409  // Builds up the structure of the network.
411 
412  IConnectableLayer* input = net->AddInputLayer(0);
413 
414  ActivationDescriptor descriptor;
416  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
417 
418  IConnectableLayer* output = net->AddOutputLayer(0);
419 
420  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
421  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
422 
423  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
424  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
425 
426  // optimize the network
427  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
428 
429  INFO("Load Network");
430  // Load it into the runtime. It should pass.
431  NetworkId netId;
432  std::string ignoredErrorMessage;
433 
435 
436  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
437  == Status::Success);
438 
439  INFO("Generate Data");
440  // Creates structures for input & output
441  std::vector<float> inputData
442  {
443  1.0f, 2.0f, 3.0f, 4.0f
444  };
445 
446  std::vector<float> outputData(4);
447 
448  std::vector<float> expectedOutput
449  {
450  1.0f, 4.0f, 9.0f, 16.0f
451  };
452 
453  INFO("Create Network");
454 
455  InputTensors inputTensors
456  {
457  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
458  };
459  OutputTensors outputTensors
460  {
461  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
462  };
463 
464  INFO("Get Profiler");
465  runtime->GetProfiler(netId)->EnableProfiling(true);
466 
467  INFO("Run Inference");
468  // Do the inference
469  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
470 
471  INFO("Print Profiler");
472  // Retrieve the Profiler.Print() output to get the workload execution
474  std::stringstream ss;
475  profilerManager.GetProfiler()->Print(ss);;
476  std::string dump = ss.str();
477 
478  // Check there are no SyncMemGeneric workloads as we didn't export
479  INFO("Find SyncMemGeneric");
480  int count = SubStringCounter(dump, "SyncMemGeneric");
481  CHECK(count == 0);
482 
483  // Should only be 1 CopyMemGeneric for the output as we imported
484  INFO("Find CopyMemGeneric");
485  count = SubStringCounter(dump, "CopyMemGeneric");
486  CHECK(count == 1);
487 
488  // Check the output is correct
489  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
490 }
491 
492 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
493 {
494  using namespace armnn;
495 
497  IRuntimePtr runtime(IRuntime::Create(options));
498 
499  // Builds up the structure of the network.
501 
502  IConnectableLayer* input = net->AddInputLayer(0);
503 
504  ActivationDescriptor descriptor;
506  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
507 
508  IConnectableLayer* output = net->AddOutputLayer(0);
509 
510  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
511  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
512 
513  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
514  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
515 
516  // optimize the network
517  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
518 
519  INFO("Load Network");
520  // Load it into the runtime. It should pass.
521  NetworkId netId;
522  std::string ignoredErrorMessage;
524  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
525  == Status::Success);
526 
527  INFO("Generate Data");
528  // Creates structures for input & output
529  std::vector<float> inputData
530  {
531  1.0f, 2.0f, 3.0f, 4.0f
532  };
533 
534  std::vector<float> outputData(4);
535 
536  std::vector<float> expectedOutput
537  {
538  1.0f, 4.0f, 9.0f, 16.0f
539  };
540 
541  INFO("Create Network");
542 
543  InputTensors inputTensors
544  {
545  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
546  };
547  OutputTensors outputTensors
548  {
549  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
550  };
551 
552  INFO("Get Profiler");
553  runtime->GetProfiler(netId)->EnableProfiling(true);
554 
555  INFO("Run Inference");
556  // Do the inference
557  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
558 
559  INFO("Print Profiler");
560  // Retrieve the Profiler.Print() output to get the workload execution
562  std::stringstream ss;
563  profilerManager.GetProfiler()->Print(ss);;
564  std::string dump = ss.str();
565 
566  // Check there is a SyncMemGeneric workload as we exported
567  INFO("Find SyncMemGeneric");
568  int count = SubStringCounter(dump, "SyncMemGeneric");
569  CHECK(count == 1);
570 
571  // Should be 1 CopyMemGeneric for the output as we did not import
572  INFO("Find CopyMemGeneric");
573  count = SubStringCounter(dump, "CopyMemGeneric");
574  CHECK(count == 1);
575 
576  // Check the output is correct
577  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
578 }
579 
580 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
581 {
582  using namespace armnn;
583 
585  IRuntimePtr runtime(IRuntime::Create(options));
586 
587  // Builds up the structure of the network.
589 
590  IConnectableLayer* input = net->AddInputLayer(0);
591 
592  ActivationDescriptor descriptor;
594  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
595 
596  IConnectableLayer* output = net->AddOutputLayer(0);
597 
598  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
599  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
600 
601  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
602  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
603 
604  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
605 
606  INFO("Load Network");
607  // Load it into the runtime. It should pass.
608  NetworkId netId;
609  std::string ignoredErrorMessage;
610 
612 
613  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
614  == Status::Success);
615 
616  INFO("Generate Data");
617  // Creates structures for input & output
618  std::vector<float> inputData
619  {
620  1.0f, 2.0f, 3.0f, 4.0f
621  };
622 
623  std::vector<float> outputData(4);
624 
625  std::vector<float> expectedOutput
626  {
627  1.0f, 4.0f, 9.0f, 16.0f
628  };
629 
630  INFO("Create Network");
631 
632  InputTensors inputTensors
633  {
634  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
635  };
636  OutputTensors outputTensors
637  {
638  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
639  };
640 
641  INFO("Get Profiler");
642  runtime->GetProfiler(netId)->EnableProfiling(true);
643 
644  INFO("Run Inference");
645  // Do the inference
646  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
647 
648  INFO("Print Profiler");
649  // Retrieve the Profiler.Print() output to get the workload execution
651  std::stringstream ss;
652  profilerManager.GetProfiler()->Print(ss);;
653  std::string dump = ss.str();
654 
655  // Check there is a SyncMemGeneric workload as we exported
656  INFO("Find SyncMemGeneric");
657  int count = SubStringCounter(dump, "SyncMemGeneric");
658  CHECK(count == 1);
659 
660  // Shouldn't be any CopyMemGeneric workloads
661  INFO("Find CopyMemGeneric");
662  count = SubStringCounter(dump, "CopyMemGeneric");
663  CHECK(count == 0);
664 
665  // Check the output is correct
666  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
667 }
668 
669 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
670 {
671  using namespace armnn;
672 
673  // Create runtime in which test will run
675  IRuntimePtr runtime(armnn::IRuntime::Create(options));
676 
677  // build up the structure of the network
679 
680  IConnectableLayer* input = net->AddInputLayer(0);
681 
682  ActivationDescriptor descriptor;
684  IConnectableLayer* activation = net->AddActivationLayer(descriptor);
685 
686  IConnectableLayer* output0 = net->AddOutputLayer(0);
687  IConnectableLayer* output1 = net->AddOutputLayer(1);
688 
689  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
690  activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
691  activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
692 
693  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
694  activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
695 
696  // Optimize the network
697  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
698 
699  // Loads it into the runtime.
700  NetworkId netId;
701  std::string ignoredErrorMessage;
702  // Enable Importing
704  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
705 
706  // Creates structures for input & output
707  std::vector<float> inputData
708  {
709  1.0f, 2.0f, 3.0f, 4.0f
710  };
711 
712  std::vector<float> outputData0(4);
713  std::vector<float> outputData1(4);
714 
715  std::vector<float> expectedOutput
716  {
717  1.0f, 4.0f, 9.0f, 16.0f
718  };
719 
720  InputTensors inputTensors
721  {
722  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
723  };
724  OutputTensors outputTensors
725  {
726  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
727  {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
728  };
729 
730  // The result of the inference is not important, just the fact that there
731  // should not be CopyMemGeneric workloads.
732  runtime->GetProfiler(netId)->EnableProfiling(true);
733 
734  // Do the inference
735  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
736 
737  // Retrieve the Profiler.Print() output to get the workload execution
739  std::stringstream ss;
740  profilerManager.GetProfiler()->Print(ss);
741  std::string dump = ss.str();
742 
743  std::size_t found = std::string::npos;
744 
745  if (backends[0] == Compute::CpuRef)
746  {
747  found = dump.find("RefActivationWorkload");
748  }
749  else if (backends[0] == Compute::CpuAcc)
750  {
751  found = dump.find("NeonActivationWorkload");
752  }
753  else if (backends[0] == Compute::GpuAcc)
754  {
755  found = dump.find("ClActivationWorkload");
756  }
757 
758  CHECK(found != std::string::npos);
759  // No contains SyncMemGeneric
760  found = dump.find("SyncMemGeneric");
761  CHECK(found == std::string::npos);
762  // Contains CopyMemGeneric
763  found = dump.find("CopyMemGeneric");
764  CHECK(found != std::string::npos);
765 
766  // Check that the outputs are correct
767  CHECK(std::equal(outputData0.begin(), outputData0.end(),
768  expectedOutput.begin(), expectedOutput.end()));
769  CHECK(std::equal(outputData1.begin(), outputData1.end(),
770  expectedOutput.begin(), expectedOutput.end()));
771 }
772 
773 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
774 {
775  using namespace armnn;
776 
777  // Create runtime in which test will run
779  IRuntimePtr runtime(armnn::IRuntime::Create(options));
780 
781  // build up the structure of the network
783 
784  IConnectableLayer* input = net->AddInputLayer(0);
785 
786  // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
787  // dim of the output to make it too small to hold the specified slice.
788  StridedSliceDescriptor descriptor;
789  descriptor.m_Begin = {0, 0};
790  descriptor.m_End = {2, 3};
791  descriptor.m_Stride = {1, 1};
792  descriptor.m_BeginMask = 0;
793  descriptor.m_EndMask = 0;
794  descriptor.m_ShrinkAxisMask = 1;
795  IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
796 
797  IConnectableLayer* output0 = net->AddOutputLayer(0);
798 
799  input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
800  stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
801 
802  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
803  stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
804 
805  // Attempt to optimize the network and check that the correct exception is thrown
806  CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
807 }
808 
809 } // anonymous namespace
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
CPU Execution: Reference C++ kernels.
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:568
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:31
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:605
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:580
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
DataType
Definition: Types.hpp:35
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1605
int NetworkId
Definition: IRuntime.hpp:25
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:198
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:475
GPU Execution: OpenCL: ArmCompute.
std::vector< int > m_Stride
Stride values for the input that will be sliced.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
std::vector< int > m_End
End values for the input that will be sliced.
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:516
A StridedSliceDescriptor for the StridedSliceLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:491
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:197
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:478
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48