ArmNN
 21.08
EndToEndTestImpl.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include "CommonTestUtils.hpp"
8 
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <armnn/IRuntime.hpp>
12 
13 #include <Profiling.hpp>
14 #include <QuantizeHelper.hpp>
15 #include <ResolveType.hpp>
16 
17 #include <doctest/doctest.h>
18 
19 #include <vector>
20 
21 namespace
22 {
23 
24 using namespace armnn;
25 
26 template<typename T>
27 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28  const TensorInfo& commonTensorInfo,
29  const std::vector<T>& inputData,
30  const std::vector<T>& constantData,
31  const std::vector<T>& expectedOutputData)
32 {
33  // Create runtime in which test will run
35  IRuntimePtr runtime(IRuntime::Create(options));
36 
37  // Builds up the structure of the network.
39 
40  IConnectableLayer* input = net->AddInputLayer(0);
41  IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42  IConnectableLayer* add = net->AddAdditionLayer();
43  IConnectableLayer* output = net->AddOutputLayer(0);
44 
45  input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48 
49  // Sets the tensors in the network.
50  input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51  constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52  add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53 
54  // optimize the network
55  IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56 
57  // Loads it into the runtime.
58  NetworkId netId;
59  runtime->LoadNetwork(netId, std::move(optNet));
60 
61  // Creates structures for input & output.
62  std::vector<T> outputData(inputData.size());
63 
64  InputTensors inputTensors
65  {
66  {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67  };
68  OutputTensors outputTensors
69  {
70  {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71  };
72 
73  // Does the inference.
74  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75 
76  // Checks the results.
77  return outputData == expectedOutputData;
78 }
79 
80 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81 {
82  const TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83 
84  return ConstantUsageTest(backends,
85  commonTensorInfo,
86  std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
87  std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
88  std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
89  );
90 }
91 
92 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
93 {
94  TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
95 
96  const float scale = 0.023529f;
97  const int8_t offset = -43;
98 
99  commonTensorInfo.SetQuantizationScale(scale);
100  commonTensorInfo.SetQuantizationOffset(offset);
101 
102  return ConstantUsageTest(backends,
103  commonTensorInfo,
104  armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
105  armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
106  armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
107  );
108 }
109 
110 // Utility function to find the number of instances of a substring within a string.
111 int SubStringCounter(std::string& string, std::string&& substring)
112 {
113  std::size_t found = 0;
114  int count = 0;
115  // Look for the substring starting from where we last found the substring
116  while((found = string.find(substring, found)) != std::string::npos)
117  {
118  count++;
119  // Offset by substring length to avoid finding the same substring twice
120  found += substring.length();
121  }
122  return count;
123 }
124 
125 template<DataType ArmnnIType, DataType ArmnnOType,
126  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
127 void EndToEndLayerTestImpl(INetworkPtr network,
128  const std::map<int, std::vector<TInput>>& inputTensorData,
129  const std::map<int, std::vector<TOutput>>& expectedOutputData,
130  std::vector<BackendId> backends,
131  float tolerance = 0.000001f)
132 {
133  // Create runtime in which test will run
135  IRuntimePtr runtime(IRuntime::Create(options));
136 
137  // optimize the network
138  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
139 
140  // Loads it into the runtime.
141  NetworkId netId;
142  runtime->LoadNetwork(netId, std::move(optNet));
143 
144  InputTensors inputTensors;
145  inputTensors.reserve(inputTensorData.size());
146  for (auto&& it : inputTensorData)
147  {
148  inputTensors.push_back({it.first,
149  ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
150  }
151  OutputTensors outputTensors;
152  outputTensors.reserve(expectedOutputData.size());
153  std::map<int, std::vector<TOutput>> outputStorage;
154  for (auto&& it : expectedOutputData)
155  {
156  std::vector<TOutput> out(it.second.size());
157  outputStorage.emplace(it.first, out);
158  outputTensors.push_back({it.first,
159  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
160  outputStorage.at(it.first).data())});
161  }
162 
163  // Does the inference.
164  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
165 
166  // Checks the results.
167  for (auto&& it : expectedOutputData)
168  {
169  std::vector<TOutput> out = outputStorage.at(it.first);
170  for (unsigned int i = 0; i < out.size(); ++i)
171  {
172  CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
173  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
174 
175  }
176  }
177 }
178 
179 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
180 {
181  using namespace armnn;
182 
183  // Create runtime in which test will run
185  IRuntimePtr runtime(armnn::IRuntime::Create(options));
186 
187  // build up the structure of the network
189 
190  IConnectableLayer* input = net->AddInputLayer(0);
191 
192  ActivationDescriptor descriptor;
194  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
195 
196  IConnectableLayer* output = net->AddOutputLayer(0);
197 
198  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
199  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
200 
201  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
202  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
203 
204  // Optimize the network
205  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
206  CHECK(optNet);
207 
208  // Loads it into the runtime.
209  NetworkId netId;
210  std::string ignoredErrorMessage;
211  // Enable Importing
213  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
214 
215  // Creates structures for input & output
216  std::vector<float> inputData
217  {
218  1.0f, 2.0f, 3.0f, 4.0f
219  };
220 
221  // Misaligned input
222  float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
223 
224  std::vector<float> outputData(4);
225 
226  // Aligned output
227  float* alignedOutputData = outputData.data();
228 
229  InputTensors inputTensors
230  {
231  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
232  };
233  OutputTensors outputTensors
234  {
235  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
236  };
237 
238  runtime->GetProfiler(netId)->EnableProfiling(true);
239 
240  // Do the inference and expect it to fail with a ImportMemoryException
241  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
242 }
243 
244 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
245 {
246  using namespace armnn;
247 
248  // Create runtime in which test will run
250  IRuntimePtr runtime(armnn::IRuntime::Create(options));
251 
252  // build up the structure of the network
254 
255  IConnectableLayer* input = net->AddInputLayer(0);
256 
257  ActivationDescriptor descriptor;
259  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
260 
261  IConnectableLayer* output = net->AddOutputLayer(0);
262 
263  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
264  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
265 
266  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
267  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
268 
269  // Optimize the network
270  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
271  CHECK(optNet);
272 
273  // Loads it into the runtime.
274  NetworkId netId;
275  std::string ignoredErrorMessage;
276  // Enable Importing and Exporting
278  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
279 
280  // Creates structures for input & output
281  std::vector<float> inputData
282  {
283  1.0f, 2.0f, 3.0f, 4.0f, 5.0f
284  };
285 
286  // Aligned input
287  float* alignedInputData = inputData.data();
288 
289  std::vector<float> outputData(5);
290 
291  // Misaligned output
292  float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
293 
294  InputTensors inputTensors
295  {
296  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
297  };
298  OutputTensors outputTensors
299  {
300  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
301  };
302 
303  // Do the inference and expect it to fail with a ExportMemoryException
304  if (backends[0] == Compute::CpuAcc)
305  {
306  // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
307  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
308  }
309  else
310  {
311  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
312  }
313 }
314 
315 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
316 {
317  using namespace armnn;
318 
319  // Create runtime in which test will run
321  IRuntimePtr runtime(armnn::IRuntime::Create(options));
322 
323  // build up the structure of the network
325 
326  IConnectableLayer* input = net->AddInputLayer(0);
327 
328  ActivationDescriptor descriptor;
330  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
331 
332  IConnectableLayer* output = net->AddOutputLayer(0);
333 
334  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
335  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
336 
337  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
338  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
339 
340  // Optimize the network
341  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
342  CHECK(optNet);
343 
344  // Loads it into the runtime.
345  NetworkId netId;
346  std::string ignoredErrorMessage;
347  // Enable Importing
349  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
350 
351  // Creates structures for input & output
352  std::vector<float> inputData
353  {
354  1.0f, 2.0f, 3.0f, 4.0f
355  };
356 
357  std::vector<float> outputData(4);
358 
359  std::vector<float> expectedOutput
360  {
361  1.0f, 4.0f, 9.0f, 16.0f
362  };
363 
364  InputTensors inputTensors
365  {
366  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
367  };
368  OutputTensors outputTensors
369  {
370  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
371  };
372 
373  runtime->GetProfiler(netId)->EnableProfiling(true);
374 
375  // Do the inference
376  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
377 
378  // Retrieve the Profiler.Print() output to get the workload execution
380  std::stringstream ss;
381  profilerManager.GetProfiler()->Print(ss);;
382  std::string dump = ss.str();
383 
384  // Contains ActivationWorkload
385  std::size_t found = dump.find("ActivationWorkload");
386  CHECK(found != std::string::npos);
387 
388  // Contains SyncMemGeneric
389  found = dump.find("SyncMemGeneric");
390  CHECK(found != std::string::npos);
391 
392  // Does not contain CopyMemGeneric
393  found = dump.find("CopyMemGeneric");
394  CHECK(found == std::string::npos);
395 
396  // Check output is as expected
397  CHECK(outputData == expectedOutput);
398 }
399 
400 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
401 {
402  using namespace armnn;
403 
405  IRuntimePtr runtime(IRuntime::Create(options));
406 
407  // Builds up the structure of the network.
409 
410  IConnectableLayer* input = net->AddInputLayer(0);
411 
412  ActivationDescriptor descriptor;
414  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
415 
416  IConnectableLayer* output = net->AddOutputLayer(0);
417 
418  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
419  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
420 
421  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
422  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
423 
424  // optimize the network
425  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
426 
427  INFO("Load Network");
428  // Load it into the runtime. It should pass.
429  NetworkId netId;
430  std::string ignoredErrorMessage;
431 
433 
434  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
435  == Status::Success);
436 
437  INFO("Generate Data");
438  // Creates structures for input & output
439  std::vector<float> inputData
440  {
441  1.0f, 2.0f, 3.0f, 4.0f
442  };
443 
444  std::vector<float> outputData(4);
445 
446  std::vector<float> expectedOutput
447  {
448  1.0f, 4.0f, 9.0f, 16.0f
449  };
450 
451  INFO("Create Network");
452  InputTensors inputTensors
453  {
454  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
455  };
456  OutputTensors outputTensors
457  {
458  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
459  };
460 
461  INFO("Get Profiler");
462  runtime->GetProfiler(netId)->EnableProfiling(true);
463 
464  INFO("Run Inference");
465  // Do the inference
466  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
467 
468  INFO("Print Profiler");
469  // Retrieve the Profiler.Print() output to get the workload execution
471  std::stringstream ss;
472  profilerManager.GetProfiler()->Print(ss);;
473  std::string dump = ss.str();
474 
475  // Check there are no SyncMemGeneric workloads as we didn't export
476  INFO("Find SyncMemGeneric");
477  int count = SubStringCounter(dump, "SyncMemGeneric");
478  CHECK(count == 0);
479 
480  // Should only be 1 CopyMemGeneric for the output as we imported
481  INFO("Find CopyMemGeneric");
482  count = SubStringCounter(dump, "CopyMemGeneric");
483  CHECK(count == 1);
484 
485  // Check the output is correct
486  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
487 }
488 
489 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
490 {
491  using namespace armnn;
492 
494  IRuntimePtr runtime(IRuntime::Create(options));
495 
496  // Builds up the structure of the network.
498 
499  IConnectableLayer* input = net->AddInputLayer(0);
500 
501  ActivationDescriptor descriptor;
503  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
504 
505  IConnectableLayer* output = net->AddOutputLayer(0);
506 
507  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
508  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
509 
510  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
511  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
512 
513  // optimize the network
514  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
515 
516  INFO("Load Network");
517  // Load it into the runtime. It should pass.
518  NetworkId netId;
519  std::string ignoredErrorMessage;
521  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
522  == Status::Success);
523 
524  INFO("Generate Data");
525  // Creates structures for input & output
526  std::vector<float> inputData
527  {
528  1.0f, 2.0f, 3.0f, 4.0f
529  };
530 
531  std::vector<float> outputData(4);
532 
533  std::vector<float> expectedOutput
534  {
535  1.0f, 4.0f, 9.0f, 16.0f
536  };
537 
538  INFO("Create Network");
539  InputTensors inputTensors
540  {
541  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
542  };
543  OutputTensors outputTensors
544  {
545  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
546  };
547 
548  INFO("Get Profiler");
549  runtime->GetProfiler(netId)->EnableProfiling(true);
550 
551  INFO("Run Inference");
552  // Do the inference
553  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
554 
555  INFO("Print Profiler");
556  // Retrieve the Profiler.Print() output to get the workload execution
558  std::stringstream ss;
559  profilerManager.GetProfiler()->Print(ss);;
560  std::string dump = ss.str();
561 
562  // Check there is a SyncMemGeneric workload as we exported
563  INFO("Find SyncMemGeneric");
564  int count = SubStringCounter(dump, "SyncMemGeneric");
565  CHECK(count == 1);
566 
567  // Should be 1 CopyMemGeneric for the output as we did not import
568  INFO("Find CopyMemGeneric");
569  count = SubStringCounter(dump, "CopyMemGeneric");
570  CHECK(count == 1);
571 
572  // Check the output is correct
573  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
574 }
575 
576 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
577 {
578  using namespace armnn;
579 
581  IRuntimePtr runtime(IRuntime::Create(options));
582 
583  // Builds up the structure of the network.
585 
586  IConnectableLayer* input = net->AddInputLayer(0);
587 
588  ActivationDescriptor descriptor;
590  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
591 
592  IConnectableLayer* output = net->AddOutputLayer(0);
593 
594  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
595  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
596 
597  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
598  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
599 
600  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
601 
602  INFO("Load Network");
603  // Load it into the runtime. It should pass.
604  NetworkId netId;
605  std::string ignoredErrorMessage;
606 
608 
609  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
610  == Status::Success);
611 
612  INFO("Generate Data");
613  // Creates structures for input & output
614  std::vector<float> inputData
615  {
616  1.0f, 2.0f, 3.0f, 4.0f
617  };
618 
619  std::vector<float> outputData(4);
620 
621  std::vector<float> expectedOutput
622  {
623  1.0f, 4.0f, 9.0f, 16.0f
624  };
625 
626  INFO("Create Network");
627  InputTensors inputTensors
628  {
629  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
630  };
631  OutputTensors outputTensors
632  {
633  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
634  };
635 
636  INFO("Get Profiler");
637  runtime->GetProfiler(netId)->EnableProfiling(true);
638 
639  INFO("Run Inference");
640  // Do the inference
641  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
642 
643  INFO("Print Profiler");
644  // Retrieve the Profiler.Print() output to get the workload execution
646  std::stringstream ss;
647  profilerManager.GetProfiler()->Print(ss);;
648  std::string dump = ss.str();
649 
650  // Check there is a SyncMemGeneric workload as we exported
651  INFO("Find SyncMemGeneric");
652  int count = SubStringCounter(dump, "SyncMemGeneric");
653  CHECK(count == 1);
654 
655  // Shouldn't be any CopyMemGeneric workloads
656  INFO("Find CopyMemGeneric");
657  count = SubStringCounter(dump, "CopyMemGeneric");
658  CHECK(count == 0);
659 
660  // Check the output is correct
661  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
662 }
663 
664 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
665 {
666  using namespace armnn;
667 
668  // Create runtime in which test will run
670  IRuntimePtr runtime(armnn::IRuntime::Create(options));
671 
672  // build up the structure of the network
674 
675  IConnectableLayer* input = net->AddInputLayer(0);
676 
677  ActivationDescriptor descriptor;
679  IConnectableLayer* activation = net->AddActivationLayer(descriptor);
680 
681  IConnectableLayer* output0 = net->AddOutputLayer(0);
682  IConnectableLayer* output1 = net->AddOutputLayer(1);
683 
684  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
685  activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
686  activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
687 
688  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
689  activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
690 
691  // Optimize the network
692  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
693 
694  // Loads it into the runtime.
695  NetworkId netId;
696  std::string ignoredErrorMessage;
697  // Enable Importing
699  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
700 
701  // Creates structures for input & output
702  std::vector<float> inputData
703  {
704  1.0f, 2.0f, 3.0f, 4.0f
705  };
706 
707  std::vector<float> outputData0(4);
708  std::vector<float> outputData1(4);
709 
710  std::vector<float> expectedOutput
711  {
712  1.0f, 4.0f, 9.0f, 16.0f
713  };
714 
715  InputTensors inputTensors
716  {
717  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
718  };
719  OutputTensors outputTensors
720  {
721  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
722  {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
723  };
724 
725  // The result of the inference is not important, just the fact that there
726  // should not be CopyMemGeneric workloads.
727  runtime->GetProfiler(netId)->EnableProfiling(true);
728 
729  // Do the inference
730  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
731 
732  // Retrieve the Profiler.Print() output to get the workload execution
734  std::stringstream ss;
735  profilerManager.GetProfiler()->Print(ss);
736  std::string dump = ss.str();
737 
738  std::size_t found = std::string::npos;
739 
740  if (backends[0] == Compute::CpuRef)
741  {
742  found = dump.find("RefActivationWorkload");
743  }
744  else if (backends[0] == Compute::CpuAcc)
745  {
746  found = dump.find("NeonActivationWorkload");
747  }
748  else if (backends[0] == Compute::GpuAcc)
749  {
750  found = dump.find("ClActivationWorkload");
751  }
752 
753  CHECK(found != std::string::npos);
754  // No contains SyncMemGeneric
755  found = dump.find("SyncMemGeneric");
756  CHECK(found == std::string::npos);
757  // Contains CopyMemGeneric
758  found = dump.find("CopyMemGeneric");
759  CHECK(found != std::string::npos);
760 
761  // Check that the outputs are correct
762  CHECK(std::equal(outputData0.begin(), outputData0.end(),
763  expectedOutput.begin(), expectedOutput.end()));
764  CHECK(std::equal(outputData1.begin(), outputData1.end(),
765  expectedOutput.begin(), expectedOutput.end()));
766 }
767 
768 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
769 {
770  using namespace armnn;
771 
772  // Create runtime in which test will run
774  IRuntimePtr runtime(armnn::IRuntime::Create(options));
775 
776  // build up the structure of the network
778 
779  IConnectableLayer* input = net->AddInputLayer(0);
780 
781  // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
782  // dim of the output to make it too small to hold the specified slice.
783  StridedSliceDescriptor descriptor;
784  descriptor.m_Begin = {0, 0};
785  descriptor.m_End = {2, 3};
786  descriptor.m_Stride = {1, 1};
787  descriptor.m_BeginMask = 0;
788  descriptor.m_EndMask = 0;
789  descriptor.m_ShrinkAxisMask = 1;
790  IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
791 
792  IConnectableLayer* output0 = net->AddOutputLayer(0);
793 
794  input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
795  stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
796 
798  stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
799 
800  // Attempt to optimize the network and check that the correct exception is thrown
801  CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
802 }
803 
804 } // anonymous namespace
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:61
CPU Execution: Reference C++ kernels.
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:526
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:30
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:563
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:360
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:538
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
DataType
Definition: Types.hpp:35
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1613
int NetworkId
Definition: IRuntime.hpp:24
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:361
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:173
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:475
GPU Execution: OpenCL: ArmCompute.
std::vector< int > m_Stride
Stride values for the input that will be sliced.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:25
std::vector< int > m_End
End values for the input that will be sliced.
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
A StridedSliceDescriptor for the StridedSliceLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:491
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:172
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:530
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:48