ArmNN
 22.08
EndToEndTestImpl.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
7 #include <CommonTestUtils.hpp>
8 
9 #include <armnn/Descriptors.hpp>
10 #include <armnn/INetwork.hpp>
11 #include <armnn/IRuntime.hpp>
12 
13 #include <Profiling.hpp>
15 #include <ResolveType.hpp>
16 
17 #include <doctest/doctest.h>
18 
19 #include <vector>
20 
21 namespace
22 {
23 
24 using namespace armnn;
25 
26 template<typename T>
27 bool ConstantUsageTest(const std::vector<BackendId>& computeDevice,
28  const TensorInfo& commonTensorInfo,
29  const std::vector<T>& inputData,
30  const std::vector<T>& constantData,
31  const std::vector<T>& expectedOutputData)
32 {
33  // Create runtime in which test will run
35  IRuntimePtr runtime(IRuntime::Create(options));
36 
37  // Builds up the structure of the network.
39 
40  IConnectableLayer* input = net->AddInputLayer(0);
41  IConnectableLayer* constant = net->AddConstantLayer(ConstTensor(commonTensorInfo, constantData));
42  IConnectableLayer* add = net->AddAdditionLayer();
43  IConnectableLayer* output = net->AddOutputLayer(0);
44 
45  input->GetOutputSlot(0).Connect(add->GetInputSlot(0));
46  constant->GetOutputSlot(0).Connect(add->GetInputSlot(1));
47  add->GetOutputSlot(0).Connect(output->GetInputSlot(0));
48 
49  // Sets the tensors in the network.
50  input->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
51  constant->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
52  add->GetOutputSlot(0).SetTensorInfo(commonTensorInfo);
53 
54  // optimize the network
55  IOptimizedNetworkPtr optNet = Optimize(*net, computeDevice, runtime->GetDeviceSpec());
56 
57  // Loads it into the runtime.
58  NetworkId netId;
59  runtime->LoadNetwork(netId, std::move(optNet));
60 
61  // Creates structures for input & output.
62  std::vector<T> outputData(inputData.size());
63 
64  InputTensors inputTensors
65  {
66  {0, ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())}
67  };
68  OutputTensors outputTensors
69  {
70  {0, Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
71  };
72 
73  // Does the inference.
74  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
75 
76  // Checks the results.
77  return outputData == expectedOutputData;
78 }
79 
80 inline bool ConstantUsageFloat32Test(const std::vector<BackendId>& backends)
81 {
82  TensorInfo commonTensorInfo({ 2, 3 }, DataType::Float32);
83  commonTensorInfo.SetConstant(true);
84 
85  return ConstantUsageTest(backends,
86  commonTensorInfo,
87  std::vector<float>{ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, // Input.
88  std::vector<float>{ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, // Const input.
89  std::vector<float>{ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f } // Expected output.
90  );
91 }
92 
93 inline bool ConstantUsageUint8Test(const std::vector<BackendId>& backends)
94 {
95  TensorInfo commonTensorInfo({ 2, 3 }, DataType::QAsymmU8);
96 
97  const float scale = 0.023529f;
98  const int8_t offset = -43;
99 
100  commonTensorInfo.SetQuantizationScale(scale);
101  commonTensorInfo.SetQuantizationOffset(offset);
102  commonTensorInfo.SetConstant(true);
103 
104  return ConstantUsageTest(backends,
105  commonTensorInfo,
106  armnnUtils::QuantizedVector<uint8_t>({ 1.f, 2.f, 3.f, 4.f, 5.f, 6.f }, scale, offset), // Input.
107  armnnUtils::QuantizedVector<uint8_t>({ 6.f, 5.f, 4.f, 3.f, 2.f, 1.f }, scale, offset), // Const input.
108  armnnUtils::QuantizedVector<uint8_t>({ 7.f, 7.f, 7.f, 7.f, 7.f, 7.f }, scale, offset) // Expected output.
109  );
110 }
111 
112 // Utility function to find the number of instances of a substring within a string.
113 int SubStringCounter(std::string& string, std::string&& substring)
114 {
115  std::size_t found = 0;
116  int count = 0;
117  // Look for the substring starting from where we last found the substring
118  while((found = string.find(substring, found)) != std::string::npos)
119  {
120  count++;
121  // Offset by substring length to avoid finding the same substring twice
122  found += substring.length();
123  }
124  return count;
125 }
126 
127 template<DataType ArmnnIType, DataType ArmnnOType,
128  typename TInput = ResolveType<ArmnnIType>, typename TOutput = ResolveType<ArmnnOType>>
129 void EndToEndLayerTestImpl(INetworkPtr network,
130  const std::map<int, std::vector<TInput>>& inputTensorData,
131  const std::map<int, std::vector<TOutput>>& expectedOutputData,
132  std::vector<BackendId> backends,
133  float tolerance = 0.000001f)
134 {
135  // Create runtime in which test will run
137  IRuntimePtr runtime(IRuntime::Create(options));
138 
139  // optimize the network
140  IOptimizedNetworkPtr optNet = Optimize(*network, backends, runtime->GetDeviceSpec());
141 
142  // Loads it into the runtime.
143  NetworkId netId;
144  runtime->LoadNetwork(netId, std::move(optNet));
145 
146  InputTensors inputTensors;
147  inputTensors.reserve(inputTensorData.size());
148  for (auto&& it : inputTensorData)
149  {
150  inputTensors.push_back({it.first,
151  ConstTensor(runtime->GetInputTensorInfo(netId, it.first), it.second.data())});
152  }
153  OutputTensors outputTensors;
154  outputTensors.reserve(expectedOutputData.size());
155  std::map<int, std::vector<TOutput>> outputStorage;
156  for (auto&& it : expectedOutputData)
157  {
158  std::vector<TOutput> out(it.second.size());
159  outputStorage.emplace(it.first, out);
160  outputTensors.push_back({it.first,
161  Tensor(runtime->GetOutputTensorInfo(netId, it.first),
162  outputStorage.at(it.first).data())});
163  }
164 
165  // Does the inference.
166  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
167 
168  // Checks the results.
169  for (auto&& it : expectedOutputData)
170  {
171  std::vector<TOutput> out = outputStorage.at(it.first);
172  for (unsigned int i = 0; i < out.size(); ++i)
173  {
174  CHECK_MESSAGE(Compare<ArmnnOType>(it.second[i], out[i], tolerance) == true,
175  "Actual output: " << out[i] << ". Expected output:" << it.second[i]);
176 
177  }
178  }
179 }
180 
181 inline void ImportNonAlignedInputPointerTest(std::vector<BackendId> backends)
182 {
183  using namespace armnn;
184 
185  // Create runtime in which test will run
187  IRuntimePtr runtime(armnn::IRuntime::Create(options));
188 
189  // build up the structure of the network
191 
192  IConnectableLayer* input = net->AddInputLayer(0);
193 
194  ActivationDescriptor descriptor;
196  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
197 
198  IConnectableLayer* output = net->AddOutputLayer(0);
199 
200  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
201  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
202 
203  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
204  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
205 
206  // Optimize the network
207  OptimizerOptions optimizedOptions;
208  optimizedOptions.m_ImportEnabled = true;
209  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
210  CHECK(optNet);
211 
212  // Loads it into the runtime.
213  NetworkId netId;
214  std::string ignoredErrorMessage;
215  // Enable Importing
217  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
218 
219  // Creates structures for input & output
220  std::vector<float> inputData
221  {
222  1.0f, 2.0f, 3.0f, 4.0f
223  };
224 
225  // Misaligned input
226  float* misalignedInputData = reinterpret_cast<float*>(reinterpret_cast<char*>(inputData.data()) + 1);
227 
228  std::vector<float> outputData(4);
229 
230  // Aligned output
231  float* alignedOutputData = outputData.data();
232 
233  InputTensors inputTensors
234  {
235  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputData)},
236  };
237  OutputTensors outputTensors
238  {
239  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), alignedOutputData)}
240  };
241 
242  runtime->GetProfiler(netId)->EnableProfiling(true);
243 
244  // Do the inference and expect it to fail with a ImportMemoryException
245  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
246 }
247 
248 inline void ExportNonAlignedOutputPointerTest(std::vector<BackendId> backends)
249 {
250  using namespace armnn;
251 
252  // Create runtime in which test will run
254  IRuntimePtr runtime(armnn::IRuntime::Create(options));
255 
256  // build up the structure of the network
258 
259  IConnectableLayer* input = net->AddInputLayer(0);
260 
261  ActivationDescriptor descriptor;
263  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
264 
265  IConnectableLayer* output = net->AddOutputLayer(0);
266 
267  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
268  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
269 
270  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
271  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
272 
273  // Optimize the network
274  OptimizerOptions optimizedOptions;
275  optimizedOptions.m_ImportEnabled = true;
276  optimizedOptions.m_ExportEnabled = true;
277  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
278  CHECK(optNet);
279 
280  // Loads it into the runtime.
281  NetworkId netId;
282  std::string ignoredErrorMessage;
283  // Enable Importing and Exporting
285  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
286 
287  // Creates structures for input & output
288  std::vector<float> inputData
289  {
290  1.0f, 2.0f, 3.0f, 4.0f, 5.0f
291  };
292 
293  // Aligned input
294  float* alignedInputData = inputData.data();
295 
296  std::vector<float> outputData(5);
297 
298  // Misaligned output
299  float* misalignedOutputData = reinterpret_cast<float*>(reinterpret_cast<char*>(outputData.data()) + 1);
300 
301  InputTensors inputTensors
302  {
303  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), alignedInputData)},
304  };
305  OutputTensors outputTensors
306  {
307  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputData)}
308  };
309 
310  // Do the inference and expect it to fail with a ExportMemoryException
311  if (backends[0] == Compute::CpuAcc)
312  {
313  // For CpuAcc the NeonTensorHandle will throw its own exception on misaligned memory
314  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryImportException);
315  }
316  else
317  {
318  CHECK_THROWS_AS(runtime->EnqueueWorkload(netId, inputTensors, outputTensors), MemoryExportException);
319  }
320 }
321 
322 inline void ImportAlignedPointerTest(std::vector<BackendId> backends)
323 {
324  using namespace armnn;
325 
326  // Create runtime in which test will run
328  IRuntimePtr runtime(armnn::IRuntime::Create(options));
329 
330  // build up the structure of the network
332 
333  IConnectableLayer* input = net->AddInputLayer(0);
334 
335  ActivationDescriptor descriptor;
337  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
338 
339  IConnectableLayer* output = net->AddOutputLayer(0);
340 
341  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
342  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
343 
344  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
345  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
346 
347  // Optimize the network
348  OptimizerOptions optimizedOptions;
349  optimizedOptions.m_ImportEnabled = true;
350  optimizedOptions.m_ExportEnabled = true;
351  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
352  CHECK(optNet);
353 
354  // Loads it into the runtime.
355  NetworkId netId;
356  std::string ignoredErrorMessage;
357  // Enable Importing
359  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
360 
361  // Creates structures for input & output
362  std::vector<float> inputData
363  {
364  1.0f, 2.0f, 3.0f, 4.0f
365  };
366 
367  std::vector<float> outputData(4);
368 
369  std::vector<float> expectedOutput
370  {
371  1.0f, 4.0f, 9.0f, 16.0f
372  };
373 
374  InputTensors inputTensors
375  {
376  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
377  };
378  OutputTensors outputTensors
379  {
380  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
381  };
382 
383  runtime->GetProfiler(netId)->EnableProfiling(true);
384 
385  // Do the inference
386  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
387 
388  // Retrieve the Profiler.Print() output to get the workload execution
390  std::stringstream ss;
391  profilerManager.GetProfiler()->Print(ss);
392  std::string dump = ss.str();
393 
394  // Contains ActivationWorkload
395  std::size_t found = dump.find("ActivationWorkload");
396  CHECK(found != std::string::npos);
397 
398  // Contains SyncMemGeneric
399  found = dump.find("SyncMemGeneric");
400  CHECK(found != std::string::npos);
401 
402  // Does not contain CopyMemGeneric
403  found = dump.find("CopyMemGeneric");
404  CHECK(found == std::string::npos);
405 
406  // Check output is as expected
407  CHECK(outputData == expectedOutput);
408 }
409 
410 inline void ImportOnlyWorkload(std::vector<BackendId> backends)
411 {
412  using namespace armnn;
413 
415  IRuntimePtr runtime(IRuntime::Create(options));
416 
417  // Builds up the structure of the network.
419 
420  IConnectableLayer* input = net->AddInputLayer(0);
421 
422  ActivationDescriptor descriptor;
424  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
425 
426  IConnectableLayer* output = net->AddOutputLayer(0);
427 
428  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
429  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
430 
431  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
432  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
433 
434  // optimize the network
435  OptimizerOptions optimizedOptions;
436  optimizedOptions.m_ImportEnabled = true;
437  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
438 
439  INFO("Load Network");
440  // Load it into the runtime. It should pass.
441  NetworkId netId;
442  std::string ignoredErrorMessage;
443 
445 
446  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
447  == Status::Success);
448 
449  INFO("Generate Data");
450  // Creates structures for input & output
451  std::vector<float> inputData
452  {
453  1.0f, 2.0f, 3.0f, 4.0f
454  };
455 
456  std::vector<float> outputData(4);
457 
458  std::vector<float> expectedOutput
459  {
460  1.0f, 4.0f, 9.0f, 16.0f
461  };
462 
463  INFO("Create Inference");
464 
465  InputTensors inputTensors
466  {
467  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
468  };
469  OutputTensors outputTensors
470  {
471  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
472  };
473 
474  INFO("Get Profiler");
475  runtime->GetProfiler(netId)->EnableProfiling(true);
476 
477  INFO("Run Inference");
478  // Do the inference
479  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
480 
481  INFO("Print Profiler");
482  // Retrieve the Profiler.Print() output to get the workload execution
484  std::stringstream ss;
485  profilerManager.GetProfiler()->Print(ss);
486  std::string dump = ss.str();
487 
488  // Check there are no SyncMemGeneric workloads as we didn't export
489  INFO("Find SyncMemGeneric");
490  int count = SubStringCounter(dump, "SyncMemGeneric");
491  CHECK(count == 0);
492 
493  // Should only be 1 CopyMemGeneric for the output as we imported
494  INFO("Find CopyMemGeneric");
495  count = SubStringCounter(dump, "CopyMemGeneric");
496  CHECK(count == 1);
497 
498  // Check the output is correct
499  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
500 }
501 
502 inline void ExportOnlyWorkload(std::vector<BackendId> backends)
503 {
504  using namespace armnn;
505 
507  IRuntimePtr runtime(IRuntime::Create(options));
508 
509  // Builds up the structure of the network.
511 
512  IConnectableLayer* input = net->AddInputLayer(0);
513 
514  ActivationDescriptor descriptor;
516  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
517 
518  IConnectableLayer* output = net->AddOutputLayer(0);
519 
520  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
521  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
522 
523  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
524  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
525 
526  // optimize the network
527  OptimizerOptions optimizedOptions;
528  optimizedOptions.m_ExportEnabled = true;
529  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
530 
531  INFO("Load Network");
532  // Load it into the runtime. It should pass.
533  NetworkId netId;
534  std::string ignoredErrorMessage;
536  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
537  == Status::Success);
538 
539  INFO("Generate Data");
540  // Creates structures for input & output
541  std::vector<float> inputData
542  {
543  1.0f, 2.0f, 3.0f, 4.0f
544  };
545 
546  std::vector<float> outputData(4);
547 
548  std::vector<float> expectedOutput
549  {
550  1.0f, 4.0f, 9.0f, 16.0f
551  };
552 
553  INFO("Create Inference");
554 
555  InputTensors inputTensors
556  {
557  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
558  };
559  OutputTensors outputTensors
560  {
561  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
562  };
563 
564  INFO("Get Profiler");
565  runtime->GetProfiler(netId)->EnableProfiling(true);
566 
567  INFO("Run Inference");
568  // Do the inference
569  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
570 
571  INFO("Print Profiler");
572  // Retrieve the Profiler.Print() output to get the workload execution
574  std::stringstream ss;
575  profilerManager.GetProfiler()->Print(ss);
576  std::string dump = ss.str();
577 
578  // Check there is a SyncMemGeneric workload as we exported
579  INFO("Find SyncMemGeneric");
580  int count = SubStringCounter(dump, "SyncMemGeneric");
581  CHECK(count == 1);
582 
583  // Should be 1 CopyMemGeneric for the output as we did not import
584  INFO("Find CopyMemGeneric");
585  count = SubStringCounter(dump, "CopyMemGeneric");
586  CHECK(count == 1);
587 
588  // Check the output is correct
589  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
590 }
591 
592 inline void ImportAndExportWorkload(std::vector<BackendId> backends)
593 {
594  using namespace armnn;
595 
597  IRuntimePtr runtime(IRuntime::Create(options));
598 
599  // Builds up the structure of the network.
601 
602  IConnectableLayer* input = net->AddInputLayer(0);
603 
604  ActivationDescriptor descriptor;
606  IConnectableLayer* pooling = net->AddActivationLayer(descriptor);
607 
608  IConnectableLayer* output = net->AddOutputLayer(0);
609 
610  input->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
611  pooling->GetOutputSlot(0).Connect(output->GetInputSlot(0));
612 
613  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
614  pooling->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
615 
616  OptimizerOptions optimizedOptions;
617  optimizedOptions.m_ImportEnabled = true;
618  optimizedOptions.m_ExportEnabled = true;
619  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
620 
621  INFO("Load Network");
622  // Load it into the runtime. It should pass.
623  NetworkId netId;
624  std::string ignoredErrorMessage;
625 
627 
628  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
629  == Status::Success);
630 
631  INFO("Generate Data");
632  // Creates structures for input & output
633  std::vector<float> inputData
634  {
635  1.0f, 2.0f, 3.0f, 4.0f
636  };
637 
638  std::vector<float> outputData(4);
639 
640  std::vector<float> expectedOutput
641  {
642  1.0f, 4.0f, 9.0f, 16.0f
643  };
644 
645  INFO("Create inference");
646 
647  InputTensors inputTensors
648  {
649  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
650  };
651  OutputTensors outputTensors
652  {
653  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
654  };
655 
656  INFO("Get Profiler");
657  runtime->GetProfiler(netId)->EnableProfiling(true);
658 
659  INFO("Run Inference");
660  // Do the inference
661  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
662 
663  INFO("Print Profiler");
664  // Retrieve the Profiler.Print() output to get the workload execution
666  std::stringstream ss;
667  profilerManager.GetProfiler()->Print(ss);
668  std::string dump = ss.str();
669 
670  // Check there is a SyncMemGeneric workload as we exported
671  INFO("Find SyncMemGeneric");
672  int count = SubStringCounter(dump, "SyncMemGeneric");
673  CHECK(count == 1);
674 
675  // Shouldn't be any CopyMemGeneric workloads
676  INFO("Find CopyMemGeneric");
677  count = SubStringCounter(dump, "CopyMemGeneric");
678  CHECK(count == 0);
679 
680  // Check the output is correct
681  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
682 }
683 
684 inline void ExportOutputWithSeveralOutputSlotConnectionsTest(std::vector<BackendId> backends)
685 {
686  using namespace armnn;
687 
688  // Create runtime in which test will run
690  IRuntimePtr runtime(armnn::IRuntime::Create(options));
691 
692  // build up the structure of the network
694 
695  IConnectableLayer* input = net->AddInputLayer(0);
696 
697  ActivationDescriptor descriptor;
699  IConnectableLayer* activation = net->AddActivationLayer(descriptor);
700 
701  IConnectableLayer* output0 = net->AddOutputLayer(0);
702  IConnectableLayer* output1 = net->AddOutputLayer(1);
703 
704  input->GetOutputSlot(0).Connect(activation->GetInputSlot(0));
705  activation->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
706  activation->GetOutputSlot(0).Connect(output1->GetInputSlot(0));
707 
708  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32, 0.0f, 0, true));
709  activation->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 4, 1 }, DataType::Float32));
710 
711  // Optimize the network
712  OptimizerOptions optimizedOptions;
713  optimizedOptions.m_ImportEnabled = true;
714  optimizedOptions.m_ExportEnabled = true;
715  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec(), optimizedOptions);
716 
717  // Loads it into the runtime.
718  NetworkId netId;
719  std::string ignoredErrorMessage;
720  // Enable Importing
722  runtime->LoadNetwork(netId, std::move(optNet), ignoredErrorMessage, networkProperties);
723 
724  // Creates structures for input & output
725  std::vector<float> inputData
726  {
727  1.0f, 2.0f, 3.0f, 4.0f
728  };
729 
730  std::vector<float> outputData0(4);
731  std::vector<float> outputData1(4);
732 
733  std::vector<float> expectedOutput
734  {
735  1.0f, 4.0f, 9.0f, 16.0f
736  };
737 
738  InputTensors inputTensors
739  {
740  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
741  };
742  OutputTensors outputTensors
743  {
744  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData0.data())},
745  {1,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 1), outputData1.data())}
746  };
747 
748  // The result of the inference is not important, just the fact that there
749  // should not be CopyMemGeneric workloads.
750  runtime->GetProfiler(netId)->EnableProfiling(true);
751 
752  // Do the inference
753  runtime->EnqueueWorkload(netId, inputTensors, outputTensors);
754 
755  // Retrieve the Profiler.Print() output to get the workload execution
757  std::stringstream ss;
758  profilerManager.GetProfiler()->Print(ss);
759  std::string dump = ss.str();
760 
761  std::size_t found = std::string::npos;
762 
763  if (backends[0] == Compute::CpuRef)
764  {
765  found = dump.find("RefActivationWorkload");
766  }
767  else if (backends[0] == Compute::CpuAcc)
768  {
769  found = dump.find("NeonActivationWorkload");
770  }
771  else if (backends[0] == Compute::GpuAcc)
772  {
773  found = dump.find("ClActivationWorkload");
774  }
775 
776  CHECK(found != std::string::npos);
777  // No contains SyncMemGeneric
778  found = dump.find("SyncMemGeneric");
779  CHECK(found == std::string::npos);
780  // Contains CopyMemGeneric
781  found = dump.find("CopyMemGeneric");
782  CHECK(found != std::string::npos);
783 
784  // Check that the outputs are correct
785  CHECK(std::equal(outputData0.begin(), outputData0.end(),
786  expectedOutput.begin(), expectedOutput.end()));
787  CHECK(std::equal(outputData1.begin(), outputData1.end(),
788  expectedOutput.begin(), expectedOutput.end()));
789 }
790 
791 inline void StridedSliceInvalidSliceEndToEndTest(std::vector<BackendId> backends)
792 {
793  using namespace armnn;
794 
795  // Create runtime in which test will run
797  IRuntimePtr runtime(armnn::IRuntime::Create(options));
798 
799  // build up the structure of the network
801 
802  IConnectableLayer* input = net->AddInputLayer(0);
803 
804  // Configure a strided slice with a stride the same size as the input but with a ShrinkAxisMask on the first
805  // dim of the output to make it too small to hold the specified slice.
806  StridedSliceDescriptor descriptor;
807  descriptor.m_Begin = {0, 0};
808  descriptor.m_End = {2, 3};
809  descriptor.m_Stride = {1, 1};
810  descriptor.m_BeginMask = 0;
811  descriptor.m_EndMask = 0;
812  descriptor.m_ShrinkAxisMask = 1;
813  IConnectableLayer* stridedSlice = net->AddStridedSliceLayer(descriptor);
814 
815  IConnectableLayer* output0 = net->AddOutputLayer(0);
816 
817  input->GetOutputSlot(0).Connect(stridedSlice->GetInputSlot(0));
818  stridedSlice->GetOutputSlot(0).Connect(output0->GetInputSlot(0));
819 
820  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 2, 3 }, DataType::Float32, 0.0f, 0, true));
821  stridedSlice->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 3 }, DataType::Float32));
822 
823  // Attempt to optimize the network and check that the correct exception is thrown
824  CHECK_THROWS_AS(Optimize(*net, backends, runtime->GetDeviceSpec()), armnn::LayerValidationException);
825 }
826 
827 inline void ForceImportWithAlignedBuffersEndToEndTest(std::vector<BackendId> backends)
828 {
829  /**
830  * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
831  * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
832  * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
833  * In this case all inputs and outputs should be imported
834  */
835  using namespace armnn;
837  IRuntimePtr runtime(IRuntime::Create(options));
838 
839  // Builds up the structure of the network.
841  IConnectableLayer* input = net->AddInputLayer(0);
842  ActivationDescriptor descriptor;
844  IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
845  IConnectableLayer* output = net->AddOutputLayer(0);
846  input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
847  activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
848  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
849  activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
850  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
851  INFO("Load Network");
852 
853  // Load it into the runtime. It should pass.
854  NetworkId netId;
855  std::string ignoredErrorMessage;
857  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
858  == Status::Success);
859  INFO("Generate Data");
860 
861  // Creates structures for input & output
862  std::vector<float> inputData
863  {
864  1.0f, 2.0f, 3.0f, 4.0f
865  };
866  std::vector<float> outputData(4);
867  std::vector<float> expectedOutput
868  {
869  1.0f, 4.0f, 9.0f, 16.0f
870  };
871 
872  // Check our input and output pointers are actually aligned
873  uintptr_t alignment = GetDataTypeSize(DataType::Float32);
874  CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
875  CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
876 
877  INFO("Create Inference");
878  InputTensors inputTensors
879  {
880  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
881  };
882  OutputTensors outputTensors
883  {
884  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
885  };
886 
887  runtime->GetProfiler(netId)->EnableProfiling(true);
888  std::vector<ImportedInputId> importedInputIds =
889  runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
890  CHECK(importedInputIds.size() == 1);
891  std::vector<ImportedOutputId> importedOutputIds =
892  runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
893  CHECK(importedOutputIds.size() == 1);
894  // Do the inference and force the import as the memory is aligned.
895  runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
896 
897  // Retrieve the Profiler.Print() output to get the workload execution
899  std::stringstream ss;
900  profilerManager.GetProfiler()->Print(ss);
901  std::string dump = ss.str();
902 
903  if (backends[0] == Compute::CpuAcc)
904  {
905  // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
906  // reconfigure is implemented
907  int count = SubStringCounter(dump, "SyncMemGeneric");
908  CHECK(count == 0);
909  // Should be 2 CopyMemGeneric workloads
910  count = SubStringCounter(dump, "CopyMemGeneric");
911  CHECK(count == 2);
912  }
913  else
914  {
915  // Check there is a SyncMemGeneric workload as we exported
916  int count = SubStringCounter(dump, "SyncMemGeneric");
917  CHECK(count == 1);
918  // Shouldn't be any CopyMemGeneric workloads
919  count = SubStringCounter(dump, "CopyMemGeneric");
920  CHECK(count == 0);
921  }
922  // Check the output is correct
923  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
924 }
925 
926 inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector<BackendId> backends)
927 {
928  /**
929  * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
930  * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
931  * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
932  * In this case all only the output should be imported
933  */
934  using namespace armnn;
935 
937  IRuntimePtr runtime(IRuntime::Create(options));
938 
939  // Builds up the structure of the network.
941  IConnectableLayer* input = net->AddInputLayer(0);
942 
943  ActivationDescriptor descriptor;
945  IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
946 
947  IConnectableLayer* output = net->AddOutputLayer(0);
948 
949  input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
950  activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
951  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
952  activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
953 
954  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
955  INFO("Load Network");
956  // Load it into the runtime. It should pass.
957  NetworkId netId;
958  std::string ignoredErrorMessage;
960  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
961  == Status::Success);
962  INFO("Generate Data");
963 
964  // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
965  // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
966  auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
967 
968  float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
969 
970  // Check if our pointer is truly misaligned
971  uintptr_t alignment = GetDataTypeSize(DataType::Float32);
972  CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
973 
974  std::vector<float> inputData
975  {
976  1.0f, 2.0f, 3.0f, 4.0f
977  };
978 
979  std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float));
980 
981  std::vector<float> outputData(4);
982  // Check our output buffer is aligned
983  CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
984 
985  std::vector<float> expectedOutput
986  {
987  1.0f, 4.0f, 9.0f, 16.0f
988  };
989 
990  INFO("Create Inference");
991  InputTensors inputTensors
992  {
993  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedMemPtr)},
994  };
995  OutputTensors outputTensors
996  {
997  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
998  };
999  runtime->GetProfiler(netId)->EnableProfiling(true);
1000  std::vector<ImportedInputId> importedInputIds =
1001  runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1002  // We expect the import to have failed.
1003  CHECK(importedInputIds.size() == 0);
1004  std::vector<ImportedOutputId> importedOutputIds =
1005  runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1006  CHECK(importedOutputIds.size() == 1);
1007 
1008  // Do the inference and force the import as the memory is misaligned.
1009  runtime->EnqueueWorkload(netId, inputTensors, OutputTensors(), importedInputIds, importedOutputIds);
1010 
1011  // Retrieve the Profiler.Print() output to get the workload execution
1013  std::stringstream ss;
1014  profilerManager.GetProfiler()->Print(ss);
1015  std::string dump = ss.str();
1016 
1017  // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1018  // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1019  // for imports/copies. Only that the output is correct.
1020  if (backends[0] != Compute::GpuAcc)
1021  {
1022  if (backends[0] == Compute::CpuAcc)
1023  {
1024  // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1025  // reconfigure is implemented
1026  // We should get 0 SyncMemGeneric for the Output
1027  int count = SubStringCounter(dump, "SyncMemGeneric");
1028  CHECK(count == 0);
1029  // Should be 2 CopyMemGeneric as we copied the input
1030  count = SubStringCounter(dump, "CopyMemGeneric");
1031  CHECK(count == 2);
1032  }
1033  else
1034  {
1035  // We should get 1 SyncMemGeneric for the Output
1036  int count = SubStringCounter(dump, "SyncMemGeneric");
1037  CHECK(count == 1);
1038  // Should only be 1 CopyMemGeneric as we copied the input
1039  count = SubStringCounter(dump, "CopyMemGeneric");
1040  CHECK(count == 1);
1041  }
1042  }
1043  // Check the output is correct
1044  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1045  std::free(memPtr);
1046 }
1047 
1048 inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1049 {
1050  /**
1051  * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1052  * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1053  * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1054  * In this case all only the input should be imported
1055  */
1056  using namespace armnn;
1057 
1058  IRuntime::CreationOptions options;
1059  IRuntimePtr runtime(IRuntime::Create(options));
1060 
1061  // Builds up the structure of the network.
1063  IConnectableLayer* input = net->AddInputLayer(0);
1064 
1065  ActivationDescriptor descriptor;
1067  IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1068 
1069  IConnectableLayer* output = net->AddOutputLayer(0);
1070 
1071  input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1072  activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1073  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1074  activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1075 
1076  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1077  INFO("Load Network");
1078  // Load it into the runtime. It should pass.
1079  NetworkId netId;
1080  std::string ignoredErrorMessage;
1082  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1083  == Status::Success);
1084  INFO("Generate Data");
1085 
1086  // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1087  // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1088  auto memPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1089 
1090  float* misalignedMemPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(memPtr) + 1);
1091 
1092  // Check if our pointer is truly misaligned
1093  uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1094  CHECK (reinterpret_cast<uintptr_t>(misalignedMemPtr) % alignment);
1095 
1096  // Creates structures for input & output
1097  std::vector<float> inputData
1098  {
1099  1.0f, 2.0f, 3.0f, 4.0f
1100  };
1101 
1102  // Check our input buffer is aligned
1103  CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1104  std::vector<float> expectedOutput
1105  {
1106  1.0f, 4.0f, 9.0f, 16.0f
1107  };
1108 
1109  INFO("Create Inference");
1110  InputTensors inputTensors
1111  {
1112  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1113  };
1114  OutputTensors outputTensors
1115  {
1116  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedMemPtr)}
1117  };
1118  runtime->GetProfiler(netId)->EnableProfiling(true);
1119  std::vector<ImportedInputId> importedInputIds =
1120  runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1121  CHECK(importedInputIds.size() == 1);
1122  // We expect this to fail.
1123  std::vector<ImportedOutputId> importedOutputIds =
1124  runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1125  CHECK(importedOutputIds.size() == 0);
1126 
1127  // Even if importing the output failed we still expect to be able to get it to work.
1128  runtime->EnqueueWorkload(netId, InputTensors(), outputTensors, importedInputIds, importedOutputIds);
1129 
1130  // Retrieve the Profiler.Print() output to get the workload execution
1132  std::stringstream ss;
1133  profilerManager.GetProfiler()->Print(ss);
1134  std::string dump = ss.str();
1135 
1136  // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1137  // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1138  // for imports/copies. Only that the output is correct.
1139  if (backends[0] != Compute::GpuAcc)
1140  {
1141  // Even though we Imported the Input we still shouldn't have a SyncMemGeneric
1142  int count = SubStringCounter(dump, "SyncMemGeneric");
1143  CHECK(count == 0);
1144  // Should only be 1 CopyMemGeneric as we copied the input
1145  count = SubStringCounter(dump, "CopyMemGeneric");
1146  if (backends[0] == Compute::CpuAcc)
1147  {
1148  // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1149  // reconfigure is implemented
1150  CHECK(count == 2);
1151  }
1152  else
1153  {
1154  CHECK(count == 1);
1155  }
1156  // Check the output is correct
1157  }
1158  unsigned int index = 0;
1159  std::vector<float> outputData(expectedOutput.size(), 0);
1160  std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float));
1161  for (auto outputValue : expectedOutput)
1162  {
1163  CHECK(outputValue == outputData[index]);
1164  ++index;
1165  }
1166  std::free(memPtr);
1167 }
1168 
1169 inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vector<BackendId> backends)
1170 {
1171  /**
1172  * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1173  * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1174  * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1175  * In this case all inputs and outputs should be copied
1176  */
1177  using namespace armnn;
1178 
1179  IRuntime::CreationOptions options;
1180  IRuntimePtr runtime(IRuntime::Create(options));
1181 
1182  // Builds up the structure of the network.
1184  IConnectableLayer* input = net->AddInputLayer(0);
1185 
1186  ActivationDescriptor descriptor;
1188  IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1189 
1190  IConnectableLayer* output = net->AddOutputLayer(0);
1191 
1192  input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1193  activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1194  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1195  activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1196 
1197  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1198  INFO("Load Network");
1199  // Load it into the runtime. It should pass.
1200  NetworkId netId;
1201  std::string ignoredErrorMessage;
1203  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1204  == Status::Success);
1205  INFO("Generate Data");
1206 
1207  // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1208  // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1209  auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1210  float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1211 
1212  // Check if our pointer is truly misaligned
1213  uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1214  CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
1215  std::vector<float> inputData
1216  {
1217  1.0f, 2.0f, 3.0f, 4.0f
1218  };
1219  std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float));
1220 
1221  auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1222  float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1223 
1224  // Check if our pointer is truly misaligned
1225  CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1226 
1227  std::vector<float> expectedOutput
1228  {
1229  1.0f, 4.0f, 9.0f, 16.0f
1230  };
1231 
1232  INFO("Create Inference");
1233  InputTensors inputTensors
1234  {
1235  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1236  };
1237  OutputTensors outputTensors
1238  {
1239  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1240  };
1241  runtime->GetProfiler(netId)->EnableProfiling(true);
1242  std::vector<ImportedInputId> importedInputIds =
1243  runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1244  // Import should have failed.
1245  CHECK(importedInputIds.size() == 0);
1246  std::vector<ImportedOutputId> importedOutputIds =
1247  runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1248  // Import should have failed.
1249  CHECK(importedOutputIds.size() == 0);
1250 
1251  // Do the inference and force the import as the memory is misaligned.
1252  runtime->EnqueueWorkload(netId, inputTensors, outputTensors, importedInputIds, importedOutputIds);
1253 
1254  // Retrieve the Profiler.Print() output to get the workload execution
1256  std::stringstream ss;
1257  profilerManager.GetProfiler()->Print(ss);
1258  std::string dump = ss.str();
1259 
1260  // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1261  // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1262  // for imports/copies. Only that the output is correct.
1263  if (backends[0] != Compute::GpuAcc)
1264  {
1265  // We can only copy so there should be no SyncMemGeneric
1266  int count = SubStringCounter(dump, "SyncMemGeneric");
1267  CHECK(count == 0);
1268  // Should only be CopyMemGeneric workloads as we copied all buffers
1269  count = SubStringCounter(dump, "CopyMemGeneric");
1270  CHECK(count == 2);
1271  }
1272  // Check the output is correct
1273  unsigned int index = 0;
1274  std::vector<float> outputData(expectedOutput.size(), 0);
1275  std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float));
1276  for (auto expectedValue : expectedOutput)
1277  {
1278  CHECK(expectedValue == outputData[index]);
1279  ++index;
1280  }
1281  std::free(inputMemPtr);
1282  std::free(outputMemPtr);
1283 }
1284 
1285 inline void ForceImportRepeatedInferencesEndToEndTest(std::vector<BackendId> backends)
1286 {
1287  /**
1288  * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1289  * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1290  * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1291  * In this we create some aligned buffers, import them into a network and validate the output and number of
1292  * SynMemGeneric/CopyMemgeneric. Then we try the same network again with misaligned buffers to make sure it falls
1293  * back to copying correctly.
1294  */
1295  using namespace armnn;
1296 
1297  IRuntime::CreationOptions options;
1298  IRuntimePtr runtime(IRuntime::Create(options));
1299 
1300  // Builds up the structure of the network.
1302  IConnectableLayer* input = net->AddInputLayer(0);
1303 
1304  ActivationDescriptor descriptor;
1306  IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1307 
1308  IConnectableLayer* output = net->AddOutputLayer(0);
1309 
1310  input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1311  activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1312  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1313  activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1314 
1315  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1316  INFO("Load Network");
1317  // Load it into the runtime. It should pass.
1318  NetworkId netId;
1319  std::string ignoredErrorMessage;
1321  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1322  == Status::Success);
1323  INFO("Generate Data");
1324 
1325  // Creates structures for input & output
1326  std::vector<float> inputData
1327  {
1328  1.0f, 2.0f, 3.0f, 4.0f
1329  };
1330  std::vector<float> outputData(4);
1331  std::vector<float> expectedOutput
1332  {
1333  1.0f, 4.0f, 9.0f, 16.0f
1334  };
1335 
1336  // Check our input and output pointers are actually aligned
1337  uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1338  CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1339  CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1340 
1341  INFO("Create Inference");
1342  InputTensors inputTensors
1343  {
1344  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1345  };
1346  OutputTensors outputTensors
1347  {
1348  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1349  };
1350 
1351  runtime->GetProfiler(netId)->EnableProfiling(true);
1352  std::vector<ImportedInputId> importedInputIds =
1353  runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1354  CHECK(importedInputIds.size() == 1);
1355  std::vector<ImportedOutputId> importedOutputIds =
1356  runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1357  CHECK(importedOutputIds.size() == 1);
1358  // Do the inference and force the import as the memory is aligned.
1359  runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
1360 
1361  // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1363  std::stringstream ss;
1364  profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1365  std::string dump = ss.str();
1366 
1367  if (backends[0] == Compute::CpuAcc)
1368  {
1369  // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1370  // reconfigure is implemented
1371  int count = SubStringCounter(dump, "SyncMemGeneric");
1372  CHECK(count == 0);
1373  // Should be 2 CopyMemGeneric workloads
1374  count = SubStringCounter(dump, "CopyMemGeneric");
1375  CHECK(count >= 1);
1376  }
1377  else
1378  {
1379  // Check there is at least 1 SyncMemGeneric workload as we exported
1380  int count = SubStringCounter(dump, "SyncMemGeneric");
1381  CHECK(count >= 1);
1382  // Shouldn't be any CopyMemGeneric workloads
1383  count = SubStringCounter(dump, "CopyMemGeneric");
1384  CHECK(count == 0);
1385  }
1386  // Check the output is correct
1387  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1388 
1389  // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1390  // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1391  auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1392  float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1393 
1394  // Check if our pointer is truly misaligned
1395  CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
1396 
1397  std::vector<float> inputValues
1398  {
1399  2.0f, 3.0f, 4.0f, 5.0f
1400  };
1401 
1402  std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float));
1403 
1404  auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1405  float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1406 
1407  // Check if our pointer is truly misaligned
1408  CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1409 
1410  std::vector<float> expectedMisalignedOutput
1411  {
1412  4.0f, 9.0f, 16.0f, 25.0f
1413  };
1414 
1415  INFO("Create Second Inference");
1416  InputTensors inputTensorsMisaligned
1417  {
1418  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1419  };
1420  OutputTensors outputTensorsMisaligned
1421  {
1422  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1423  };
1424  importedInputIds = runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
1425  // Import should fail.
1426  CHECK(importedInputIds.size() == 0);
1427  importedOutputIds = runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
1428  // Import should fail.
1429  CHECK(importedOutputIds.size() == 0);
1430 
1431  // Do the inference and force the import as the memory is misaligned.
1432  runtime->EnqueueWorkload(netId,
1433  inputTensorsMisaligned,
1434  outputTensorsMisaligned,
1435  importedInputIds,
1436  importedOutputIds);
1437 
1438  // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1439  // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1440  profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1441  dump = ss.str();
1442 
1443  // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1444  // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1445  // for imports/copies. Only that the output is correct.
1446  if (backends[0] != Compute::GpuAcc)
1447  {
1448  // The SyncMemGeneric will still be in the profiling log from the first inference
1449  int count = SubStringCounter(dump, "SyncMemGeneric");
1450  CHECK(count >= 1);
1451  // We should now see CopyMemGeneric workloads as we copied all buffers
1452  count = SubStringCounter(dump, "CopyMemGeneric");
1453  CHECK(count >= 1);
1454  }
1455  // Check the output is correct
1456  unsigned int index = 0;
1457  std::vector<float> alignedOutputData(expectedMisalignedOutput.size(), 0);
1458  std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float));
1459  for (auto outputValue : expectedMisalignedOutput)
1460  {
1461  CHECK(outputValue == alignedOutputData[index]);
1462  ++index;
1463  }
1464  // Clean up to avoid interfering with other tests
1465  runtime->UnloadNetwork(netId);
1466  std::free(inputMemPtr);
1467  std::free(outputMemPtr);
1468 }
1469 
1470 
1471 inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector<BackendId> backends)
1472 {
1473  /**
1474  * This test is similar to the Import tests above, we create a network with a square function and pass in a vector
1475  * with 4 floats, square them. and validate the output. We then check the profiling logs to see if input/output
1476  * tensors are copied (CopyMemGeneric) or imported (SyncMemGeneric)
1477  * In this we create some misaligned buffers, copy them into a network and validate the output and number of
1478  * SynMemGeneric/CopyMemgeneric. Then we try the same network again with aligned buffers to make sure it switches
1479  * to importing correctly.
1480  */
1481  using namespace armnn;
1482 
1483  IRuntime::CreationOptions options;
1484  IRuntimePtr runtime(IRuntime::Create(options));
1485 
1486  // Builds up the structure of the network.
1488  IConnectableLayer* input = net->AddInputLayer(0);
1489 
1490  ActivationDescriptor descriptor;
1492  IConnectableLayer* activationLayer = net->AddActivationLayer(descriptor);
1493 
1494  IConnectableLayer* output = net->AddOutputLayer(0);
1495 
1496  input->GetOutputSlot(0).Connect(activationLayer->GetInputSlot(0));
1497  activationLayer->GetOutputSlot(0).Connect(output->GetInputSlot(0));
1498  input->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32, 0.0f, 0, true));
1499  activationLayer->GetOutputSlot(0).SetTensorInfo(TensorInfo({ 1, 1, 1, 4 }, DataType::Float32));
1500 
1501  IOptimizedNetworkPtr optNet = Optimize(*net, backends, runtime->GetDeviceSpec());
1502  INFO("Load Network");
1503  // Load it into the runtime. It should pass.
1504  NetworkId netId;
1505  std::string ignoredErrorMessage;
1507  CHECK(runtime->LoadNetwork(netId, std::move(optNet),ignoredErrorMessage, networkProperties)
1508  == Status::Success);
1509  INFO("Generate Data");
1510 
1511  // This code looks a little funky but the idea is to create a buffer of floats but offset by the size of a char
1512  // this will guarantee that the resultant buffer is misaligned and thus should always be copied.
1513  auto inputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1514  float* misalignedInputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(inputMemPtr) + 1);
1515 
1516  // Check if our pointer is truly misaligned
1517  uintptr_t alignment = GetDataTypeSize(DataType::Float32);
1518  CHECK (reinterpret_cast<uintptr_t>(misalignedInputPtr) % alignment);
1519  std::vector<float> inputValues
1520  {
1521  2.0f, 3.0f, 4.0f, 5.0f
1522  };
1523  std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float));
1524 
1525  auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char));
1526  float* misalignedOutputPtr = reinterpret_cast<float*>(reinterpret_cast<char*>(outputMemPtr) + 1);
1527 
1528  // Check if our pointer is truly misaligned
1529  CHECK (reinterpret_cast<uintptr_t>(misalignedOutputPtr) % alignment);
1530 
1531  std::vector<float> expectedMisalignedOutput
1532  {
1533  4.0f, 9.0f, 16.0f, 25.0f
1534  };
1535 
1536  INFO("Create Second Inference");
1537  InputTensors inputTensorsMisaligned
1538  {
1539  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), misalignedInputPtr)},
1540  };
1541  OutputTensors outputTensorsMisaligned
1542  {
1543  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), misalignedOutputPtr)}
1544  };
1545  runtime->GetProfiler(netId)->EnableProfiling(true);
1546  std::vector<ImportedInputId> importedInputIds =
1547  runtime->ImportInputs(netId, inputTensorsMisaligned, MemorySource::Malloc);
1548  // Import should fail.
1549  CHECK(importedInputIds.size() == 0);
1550  std::vector<ImportedOutputId> importedOutputIds =
1551  runtime->ImportOutputs(netId, outputTensorsMisaligned, MemorySource::Malloc);
1552  // Import should fail.
1553  CHECK(importedOutputIds.size() == 0);
1554 
1555  // Do the inference and force the import as the memory is misaligned.
1556  runtime->EnqueueWorkload(netId,
1557  inputTensorsMisaligned,
1558  outputTensorsMisaligned,
1559  importedInputIds,
1560  importedOutputIds);
1561 
1562  // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1564  std::stringstream ss;
1565  profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1566  std::string dump = ss.str();
1567 
1568  // GpuAcc is a different case to CpuRef and CpuAcc, it doesn't use the buffer directly but instead maps it to a
1569  // new set of addresses within Gpu Memory. This will almost always be auto-aligned, so we don't need to check
1570  // for imports/copies. Only that the output is correct.
1571  if (backends[0] != Compute::GpuAcc)
1572  {
1573  // We can only copy so there should be no SyncMemGeneric
1574  int count = SubStringCounter(dump, "SyncMemGeneric");
1575  CHECK(count == 0);
1576  // Should only be CopyMemGeneric workloads as we copied all buffers
1577  count = SubStringCounter(dump, "CopyMemGeneric");
1578  CHECK(count >= 1);
1579  }
1580  // Check the output is correct
1581  unsigned int index = 0;
1582  std::vector<float> alignedOutput(expectedMisalignedOutput.size());
1583  std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float));
1584  for (auto outputValue : expectedMisalignedOutput)
1585  {
1586  CHECK(outputValue == alignedOutput[index]);
1587  ++index;
1588  }
1589  std::free(inputMemPtr);
1590  std::free(outputMemPtr);
1591 
1592  // Creates structures for input & output
1593  std::vector<float> inputData
1594  {
1595  1.0f, 2.0f, 3.0f, 4.0f
1596  };
1597  std::vector<float> outputData(4);
1598  std::vector<float> expectedOutput
1599  {
1600  1.0f, 4.0f, 9.0f, 16.0f
1601  };
1602 
1603  // Check our input and output pointers are actually aligned
1604  CHECK(!(reinterpret_cast<uintptr_t>(inputData.data()) % alignment));
1605  CHECK(!(reinterpret_cast<uintptr_t>(outputData.data()) % alignment));
1606 
1607  INFO("Create Inference");
1608  InputTensors inputTensors
1609  {
1610  {0,armnn::ConstTensor(runtime->GetInputTensorInfo(netId, 0), inputData.data())},
1611  };
1612  OutputTensors outputTensors
1613  {
1614  {0,armnn::Tensor(runtime->GetOutputTensorInfo(netId, 0), outputData.data())}
1615  };
1616 
1617  importedInputIds = runtime->ImportInputs(netId, inputTensors, MemorySource::Malloc);
1618  CHECK(importedInputIds.size() == 1);
1619  importedOutputIds = runtime->ImportOutputs(netId, outputTensors, MemorySource::Malloc);
1620  CHECK(importedOutputIds.size() == 1);
1621  // Do the inference and force the import as the memory is aligned.
1622  runtime->EnqueueWorkload(netId, InputTensors(), OutputTensors(), importedInputIds, importedOutputIds);
1623 
1624  // Retrieve the Profiler.AnalyzeEventsAndWriteResults() output to get the workload execution
1625  // We need to use AnalyzeEventsAndWriteResults here to make sure the second inference has been profiled
1626  profilerManager.GetProfiler()->AnalyzeEventsAndWriteResults(ss);
1627  dump = ss.str();
1628 
1629  if (backends[0] == Compute::CpuAcc)
1630  {
1631  // Reconfigure has not been implemented for CpuAcc so it will always copy, this will break whenever
1632  // reconfigure is implemented
1633  int count = SubStringCounter(dump, "SyncMemGeneric");
1634  CHECK(count == 0);
1635  // Should be 2 CopyMemGeneric workloads
1636  count = SubStringCounter(dump, "CopyMemGeneric");
1637  CHECK(count >= 1);
1638  }
1639  else
1640  {
1641  // Repeated inferences make it difficult to check for an accurate count. So we just validate that we have a
1642  // SyncMemGeneric Workload when we previously didn't
1643  int count = SubStringCounter(dump, "SyncMemGeneric");
1644  CHECK(count >= 1);
1645  // Should still be some CopyMemGeneric Workloads from the last inference
1646  count = SubStringCounter(dump, "CopyMemGeneric");
1647  CHECK(count >= 1);
1648  }
1649  // Check the output is correct
1650  CHECK(std::equal(outputData.begin(), outputData.end(), expectedOutput.begin(), expectedOutput.end()));
1651  // Clean up to avoid interfering with other tests
1652  runtime->UnloadNetwork(netId);
1653 }
1654 
1655 } // anonymous namespace
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:68
CPU Execution: Reference C++ kernels.
int32_t m_ShrinkAxisMask
Shrink axis mask value. If set, the nth specification shrinks the dimensionality by 1...
static ProfilerManager & GetInstance()
Definition: Profiling.cpp:572
std::vector< int > m_Begin
Begin values for the input that will be sliced.
std::unique_ptr< IRuntime, void(*)(IRuntime *runtime)> IRuntimePtr
Definition: IRuntime.hpp:33
void Print(std::ostream &outStream) const
Print stats for events in JSON Format to the given output stream.
Definition: Profiling.cpp:609
typename ResolveTypeImpl< DT >::Type ResolveType
Definition: ResolveType.hpp:79
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
void AnalyzeEventsAndWriteResults(std::ostream &outStream) const
Analyzes the tracked events and writes the results to the given output stream.
Definition: Profiling.cpp:604
Copyright (c) 2021 ARM Limited and Contributors.
int32_t m_BeginMask
Begin mask value.
int32_t m_EndMask
End mask value.
virtual void SetTensorInfo(const TensorInfo &tensorInfo)=0
IProfiler * GetProfiler()
Definition: Profiling.cpp:584
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
DataType
Definition: Types.hpp:48
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1864
int NetworkId
Definition: IRuntime.hpp:27
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:239
void SetQuantizationScale(float scale)
Definition: Tensor.cpp:473
GPU Execution: OpenCL: ArmCompute.
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:127
std::vector< int > m_Stride
Stride values for the input that will be sliced.
An ActivationDescriptor for the ActivationLayer.
Definition: Descriptors.hpp:36
std::vector< int > m_End
End values for the input that will be sliced.
CPU Execution: NEON: ArmCompute.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
void SetConstant(const bool IsConstant=true)
Marks the data corresponding to this tensor info as constant.
Definition: Tensor.cpp:514
A StridedSliceDescriptor for the StridedSliceLayer.
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
void SetQuantizationOffset(int32_t offset)
Definition: Tensor.cpp:489
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:238
virtual int Connect(IInputSlot &destination)=0
static INetworkPtr Create(NetworkOptions networkOptions={})
Definition: Network.cpp:475
ActivationFunction m_Function
The activation function to use (Sigmoid, TanH, Linear, ReLu, BoundedReLu, SoftReLu, LeakyReLu, Abs, Sqrt, Square, Elu).
Definition: Descriptors.hpp:59
constexpr unsigned int GetDataTypeSize(DataType dataType)
Definition: TypesUtils.hpp:151