ArmNN
 22.08
ArmNNExecutor.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 
7 #include "ArmNNExecutor.hpp"
9 
12 
13 
14 using namespace armnn;
15 using namespace std::chrono;
16 
18 : m_Params(params)
19 {
20  runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
21  runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
22  m_Runtime = armnn::IRuntime::Create(runtimeOptions);
23 
24  auto parser = CreateParser();
25  auto network = parser->CreateNetwork(m_Params);
26  auto optNet = OptimizeNetwork(network.get());
27 
28  m_IOInfo = GetIOInfo(optNet.get());
29  SetupInputsAndOutputs();
30 
31  std::string errorMsg;
32 
33  armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
34  if (params.m_OutputDetailsOnlyToStdOut)
35  {
36  profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
37  }
38  else if (params.m_OutputDetailsToStdOut)
39  {
40  profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
41  }
42 
43  INetworkProperties networkProperties{m_Params.m_Concurrent,
44  MemorySource::Undefined,
45  MemorySource::Undefined,
46  params.m_EnableProfiling,
47  profilingDetailsMethod};
48 
49  m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
50 
51  if (m_Params.m_Iterations > 1)
52  {
53  std::stringstream msg;
54  msg << "Network will be executed " << m_Params.m_Iterations;
55  if (m_Params.m_Concurrent)
56  {
57  msg << " times in an asynchronous manner. ";
58  }
59  else
60  {
61  msg << " times successively. ";
62  }
63  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
64  "cover each execution.";
65  ARMNN_LOG(info) << msg.str();
66  }
67 
68  if (m_Params.m_GenerateTensorData)
69  {
70  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
71  }
72 
73  if (m_Params.m_DontPrintOutputs)
74  {
75  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
76  }
77 }
78 
79 void ArmNNExecutor::ExecuteAsync()
80 {
81  std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
82  std::unique_ptr<armnn::Threadpool> threadpool;
83  armnn::AsyncCallbackManager callbackManager;
84  std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
85 
86  for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
87  {
88  memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
89  }
90 
91  threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
92  m_Runtime.get(),
93  memHandles);
94 
95  ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
96  // Declare the latest and earliest inference times here to be used when calculating overall time
97  std::chrono::high_resolution_clock::time_point earliestStartTime =
98  std::chrono::high_resolution_clock::time_point::max();
99  std::chrono::high_resolution_clock::time_point latestEndTime =
100  std::chrono::high_resolution_clock::now();
101 
102  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
103  // LoadedNetwork with each scheduled inference having a specific priority
104  for (size_t i = 0; i < m_Params.m_Iterations; ++i)
105  {
106  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
107 
108  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
109  inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
110  threadpool->Schedule(m_NetworkId,
111  m_InputTensorsVec[i],
112  m_OutputTensorsVec[i],
114  cb);
115  }
116 
117  // Check the results
118  for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
119  {
120  auto cb = callbackManager.GetNotifiedCallback();
121 
122  // Get the results
123  if (earliestStartTime > cb->GetStartTime())
124  {
125  earliestStartTime = cb->GetStartTime();
126  }
127  if (latestEndTime < cb->GetEndTime())
128  {
129  latestEndTime = cb->GetEndTime();
130  }
131 
132  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
133  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
134  auto inferenceDuration = endTime - startTime;
135  CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
136  if(!m_Params.m_DontPrintOutputs)
137  {
138  const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
139  PrintOutputTensors(out, iteration);
140  }
141  }
142 
143  // Print duration difference between overallStartTime and overallEndTime
144  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
145  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
146  auto totalInferenceDuration = overallEndTime - overallStartTime;
147  ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
148  << std::fixed << totalInferenceDuration.count() << " ms\n";
149 
150 }
151 
152 void ArmNNExecutor::ExecuteSync()
153 {
154  for (size_t x = 0; x < m_Params.m_Iterations; x++)
155  {
156  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
157 
158  const auto start_time = armnn::GetTimeNow();
159  armnn::Status ret;
160  if (m_Params.m_ImportInputsIfAligned)
161  {
162  ret = m_Runtime->EnqueueWorkload(m_NetworkId,
163  m_InputTensorsVec[x],
164  m_OutputTensorsVec[x],
165  m_ImportedInputIds[x],
166  m_ImportedOutputIds[x]);
167  }
168  else
169  {
170  ret = m_Runtime->EnqueueWorkload(m_NetworkId,
171  m_InputTensorsVec[x],
172  m_OutputTensorsVec[x]);
173  }
174 
175  const auto inferenceDuration = armnn::GetTimeDuration(start_time);
176 
177  // If profiling is enabled print out the results
178  if(profiler && profiler->IsProfilingEnabled())
179  {
180  profiler->Print(std::cout);
181  }
182 
183  if(ret == armnn::Status::Failure)
184  {
185  throw armnn::Exception("IRuntime::EnqueueWorkload failed");
186  }
187 
188  if(!m_Params.m_DontPrintOutputs)
189  {
190  PrintOutputTensors(&m_OutputTensorsVec[x], x);
191  }
192 
193  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
194  CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
195  }
196 }
197 
198 std::vector<const void*> ArmNNExecutor::Execute()
199 {
200  if(m_Params.m_ThreadPoolSize == 0)
201  {
202  ExecuteSync();
203  }
204  else
205  {
206  ExecuteAsync();
207  }
208  std::vector<const void*> results;
209  for (auto& output : m_OutputStorage)
210  {
211  results.push_back(output.m_Mem);
212  }
213 
214  return results;
215 }
216 
218 {
219  const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
220  m_Params.m_InputNames :
221  m_IOInfo.m_InputNames;
222  std::stringstream ss;
223  ss << "===== Network Info =====\n";
224  ss << "Inputs in order:\n";
225  for (const auto& inputName : inputNames)
226  {
227  const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
228  ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
229  if (inputInfo.IsQuantized())
230  {
231  ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
232  if (inputInfo.HasMultipleQuantizationScales())
233  {
234  ss << " Quantization scales: ";
235  for (const auto scale: inputInfo.GetQuantizationScales())
236  {
237  ss << scale << ", ";
238  }
239  }
240  else
241  {
242  ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
243  }
244  }
245  ss << "\n";
246  }
247 
248  ss << "Outputs in order:\n";
249  for (const auto& outputName : m_IOInfo.m_OutputNames)
250  {
251  const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
252  ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
253  if (outputInfo.IsQuantized())
254  {
255  ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
256  if (outputInfo.HasMultipleQuantizationScales())
257  {
258  ss << " Quantization scales: ";
259  for (const auto scale: outputInfo.GetQuantizationScales())
260  {
261  ss << scale << ", ";
262  }
263  }
264  else
265  {
266  ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
267  }
268  }
269  ss << "\n";
270  }
271 
272  std::cout << ss.str() << std::endl;
273 }
274 
275 void ArmNNExecutor::SetupInputsAndOutputs()
276 {
277  const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
278 
279  if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
280  {
281  LogAndThrow("Number of input names does not match number of inputs");
282  }
283 
284  const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
285  const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
286  m_Params.m_InputNames :
287  m_IOInfo.m_InputNames;
288  unsigned int noInputSets = 1;
289 
290  if (inputFilePaths != 0)
291  {
292  if (inputFilePaths % noOfInputs != 0)
293  {
294  LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
295  " not compatible with number of inputs: " + std::to_string(noOfInputs));
296  }
297  noInputSets = inputFilePaths / noOfInputs;
298  if (noInputSets != 1 && m_Params.m_ReuseBuffers)
299  {
300  LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
301  }
302  }
303 
304  const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
305  const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
306  unsigned int noOutputSets = 1;
307 
308  if (outputFilePaths != 0)
309  {
310  if (outputFilePaths % noOfOutputs != 0)
311  {
312  LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
313  ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
314  }
315  noOutputSets = outputFilePaths / noOfOutputs;
316 
317  if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
318  {
319  LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
320  }
321  }
322 
323  if (m_Params.m_ThreadPoolSize != 0)
324  {
325  // The current implementation of the Threadpool does not allow binding of outputs to a thread
326  // So to ensure no two threads write to the same output at the same time, no output can be reused
327  noOutputSets = m_Params.m_Iterations;
328  }
329 
330  if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
331  {
332  ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
333  << "for each input. The user provided "
334  << m_Params.m_InputTensorDataFilePaths.size()
335  << " input-tensor-data file/s which will be used to fill the input/s.\n";
336  }
337 
338  unsigned int inputCount = 0;
339  for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
340  {
341  armnn::InputTensors inputTensors;
342  for (const auto& inputName: inputNames)
343  {
344  armnn::BindingPointInfo bindingPointInfo;
345  try
346  {
347  bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
348  }
349  catch (const std::out_of_range& e)
350  {
351  LogAndThrow("Input with inputName: " + inputName + " not found.");
352  }
353 
354  const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
355  auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
356  tensorInfo.GetQuantizationScale(),
357  tensorInfo.GetQuantizationOffset(),
358  true};
359 
360  m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
361 
362  const int bindingId = bindingPointInfo.first;
363  inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
364 
365  const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
367  armnn::MakeOptional<std::string>(
368  m_Params.m_InputTensorDataFilePaths.at(inputCount++));
369 
370  switch (tensorInfo.GetDataType())
371  {
373  {
374  auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
375  PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
376  break;
377  }
379  {
380  auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
381  PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
382  break;
383  }
386  {
387  auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
388  PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
389  break;
390  }
392  {
393  auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
394  PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
395  break;
396  }
398  {
399  auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
400  PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
401  break;
402  }
403  default:
404  {
405  LogAndThrow("Unexpected DataType");
406  }
407  }
408 
409  if (m_Params.m_ImportInputsIfAligned)
410  {
411  m_ImportedInputIds.push_back(
412  m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
413  }
414  }
415  m_InputTensorsVec.emplace_back(inputTensors);
416  }
417 
418  for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
419  {
420  armnn::OutputTensors outputTensors;
421  for (const auto& output: m_IOInfo.m_OutputInfoMap)
422  {
423  const armnn::BindingPointInfo& bindingPointInfo = output.second;
424  const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
425 
426  m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
427  outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
428  }
429  m_OutputTensorsVec.emplace_back(outputTensors);
430  if (m_Params.m_ImportInputsIfAligned)
431  {
432  m_ImportedOutputIds.push_back(
433  m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
434  }
435  }
436 
437  // Fill the remaining iterations with copies
438  const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets;
439  for (unsigned int i = 1; i <= remainingInputSets; i++)
440  {
441  m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
442  if (m_Params.m_ImportInputsIfAligned)
443  {
444  m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
445  }
446  }
447 
448  const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets;
449  for (unsigned int i = 1; i <= remainingOutputSets; i++)
450  {
451  m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
452  if (m_Params.m_ImportInputsIfAligned)
453  {
454  m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
455  }
456  }
457 }
458 
459 ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
460 {
461  struct IOStrategy : armnn::IStrategy
462  {
463  void ExecuteStrategy(const armnn::IConnectableLayer* layer,
464  const armnn::BaseDescriptor& descriptor,
465  const std::vector<armnn::ConstTensor>& constants,
466  const char* name,
467  const armnn::LayerBindingId id = 0) override
468  {
469  armnn::IgnoreUnused(descriptor, constants, id);
470  switch (layer->GetType())
471  {
473  {
474  m_IOInfo.m_InputNames.emplace_back(name);
475  m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
476  break;
477  }
479  {
480  m_IOInfo.m_OutputNames.emplace_back(name);
481  m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
482  break;
483  }
484  default: {}
485  }
486  }
487  IOInfo m_IOInfo;
488  };
489 
490  IOStrategy ioStrategy;
491  optNet->ExecuteStrategy(ioStrategy);
492 
493  return ioStrategy.m_IOInfo;
494 }
495 
496 armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
497 {
499 
500  armnn::OptimizerOptions options;
501  options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
502  options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
503  options.m_Debug = m_Params.m_PrintIntermediate;
504  options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
507  options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
508 
509  armnn::BackendOptions gpuAcc("GpuAcc",
510  {
511  { "FastMathEnabled", m_Params.m_EnableFastMath },
512  { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
513  { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
514  { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
515  });
516 
517  armnn::BackendOptions cpuAcc("CpuAcc",
518  {
519  { "FastMathEnabled", m_Params.m_EnableFastMath },
520  { "NumberOfThreads", m_Params.m_NumberOfThreads }
521  });
522  options.m_ModelOptions.push_back(gpuAcc);
523  options.m_ModelOptions.push_back(cpuAcc);
524 
525  const auto optimization_start_time = armnn::GetTimeNow();
526  optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
527 
528  ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
529  << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
530 
531  if (!optNet)
532  {
533  LogAndThrow("Optimize returned nullptr");
534  }
535 
536  // If v,visualize-optimized-model is enabled then construct a file name for the dot file.
537  if (m_Params.m_EnableLayerDetails)
538  {
539  fs::path filename = m_Params.m_ModelPath;
540  filename.replace_extension("dot");
541  std::fstream file(filename.c_str(), std::ios_base::out);
542  optNet->SerializeToDot(file);
543  }
544 
545  return optNet;
546 }
547 
548 std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
549 {
550  // If no model format is given check the file name
551  const std::string& modelFormat = m_Params.m_ModelPath;
552 
553  m_Params.m_IsModelBinary = modelFormat.find("json") == std::string::npos ? true : false;
554  std::unique_ptr<IParser> parser = nullptr;
555  // Forward to implementation based on the parser type
556  if (modelFormat.find("armnn") != std::string::npos)
557  {
558 #if defined(ARMNN_SERIALIZER)
559  parser = std::make_unique<ArmNNDeserializer>();
560 #else
561  LogAndThrow("Not built with serialization support.");
562 #endif
563  }
564  else if(modelFormat.find("tflite") != std::string::npos)
565  {
566 #if defined(ARMNN_TF_LITE_PARSER)
567  parser = std::make_unique<TfliteParser>(m_Params);
568 #else
569  LogAndThrow("Not built with Tensorflow-Lite parser support.");
570 #endif
571  }
572  else if (modelFormat.find("onnx") != std::string::npos)
573  {
574 #if defined(ARMNN_ONNX_PARSER)
575  parser = std::make_unique<OnnxParser>();
576 #else
577  LogAndThrow("Not built with Onnx parser support.");
578 #endif
579  }
580 
581  return parser;
582 }
583 
584 void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
585  unsigned int iteration)
586 {
587  auto findOutputName = [&](const armnn::LayerBindingId id)
588  {
589  for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
590  {
591  if (id == it->second.first)
592  {
593  return it->first;
594  }
595  }
596  return std::string{};
597  };
598 
599  unsigned int outputIndex = 0;
600  unsigned int numOutputs = outputTensors->size();
601  for (const auto& output: *outputTensors)
602  {
603  const auto bindingName = findOutputName(output.first);
604  // We've made sure before that the number of output files either equals numOutputs, in which
605  // case we override those files when processing the results of each iteration (only the result
606  // of the last iteration will be stored), or there are enough
607  // output files for each output of each iteration.
608  size_t outputFileIndex = iteration * numOutputs + outputIndex;
609  if (!m_Params.m_OutputTensorFiles.empty())
610  {
611  outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
612  ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
613  << output.first
614  << "' of iteration: " << iteration + 1 << " to file: '"
615  << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
616  }
617 
618  const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
620  armnn::MakeOptional<std::string>(
621  m_Params.m_OutputTensorFiles[outputFileIndex]);
622 
623  OutputWriteInfo outputWriteInfo
624  {
625  outputTensorFile,
626  bindingName,
627  output.second,
628  !m_Params.m_DontPrintOutputs
629  };
630 
631  std::cout << bindingName << ": ";
632  std::vector<float> values;
633  switch (output.second.GetDataType())
634  {
636  {
637  PrintTensor<float>(outputWriteInfo, "%f ");
638  break;
639  }
640 
642  {
643  PrintTensor<int>(outputWriteInfo, "%d ");
644  break;
645  }
648  {
649  PrintTensor<int8_t>(outputWriteInfo, "%d ");
650  break;
651  }
653  {
654  PrintTensor<uint8_t>(outputWriteInfo, "%d ");
655  break;
656  }
662  default:
663  {
664  LogAndThrow("Unexpected DataType");
665  }
666  }
667  std::cout << "\n";
668  }
669 }
670 
671 void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
672 {
673  unsigned int index = 0;
674 
675  for (const auto& outputTensors: m_OutputTensorsVec)
676  {
677  for (const auto& outputTensor: outputTensors)
678  {
679  float result = 0;
680  size_t size = outputTensor.second.GetNumBytes();
681 
682  switch (outputTensor.second.GetDataType())
683  {
685  {
686  result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
687  break;
688  }
690  {
691  result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
692  break;
693  }
696  {
697  result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
698  break;
699  }
701  {
702  result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
703  break;
704  }
705  default:
706  {
707  LogAndThrow("Unexpected DataType");
708  }
709  }
710  std::cout << "RMSE: of " << result << "\n";
711  }
712  }
713 }
714 #if defined(ARMNN_SERIALIZER)
715 ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
716 
717 armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
718 {
719  const std::string& modelPath = params.m_ModelPath;
720 
721  std::ifstream file(modelPath, std::ios::binary);
722  return m_Parser->CreateNetworkFromBinary(file);
723 }
724 
726 ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
727 {
728  armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
729  return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
730 }
731 
733 ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
734 {
735  armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
736  return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
737 }
738 #endif
739 
740 #if defined(ARMNN_TF_LITE_PARSER)
741 ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
742 {
745  options.m_InferAndValidate = params.m_InferOutputShape;
746 
747  m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
748 }
749 
750 armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
751 {
752  const std::string& modelPath = params.m_ModelPath;
753  return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
754 }
755 
756 armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
757  const std::string& inputName)
758 {
759  return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
760 }
761 
762 armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
763  const std::string& outputName)
764 {
765  return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
766 }
767 #endif
768 
769 
770 #if defined(ARMNN_ONNX_PARSER)
771 ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
772 
773 armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
774 {
775  const std::string& modelPath = params.m_ModelPath;
777  std::map<std::string, armnn::TensorShape> inputShapes;
778  if(!params.m_InputTensorShapes.empty())
779  {
780  const size_t numInputShapes = params.m_InputTensorShapes.size();
781  const size_t numInputBindings = params.m_InputNames.size();
782  if(numInputShapes < numInputBindings)
783  {
784  throw armnn::Exception(
785  fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
786  numInputBindings, numInputShapes));
787  }
788 
789  for (size_t i = 0; i < numInputShapes; i++)
790  {
791  inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
792  }
793 
794  return params.m_IsModelBinary ?
795  m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
796  m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
797  }
798 
799  // Handle text and binary input differently by calling the corresponding parser function
800  return params.m_IsModelBinary ?
801  m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
802  m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
803 }
804 
805 armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
806 {
807  return m_Parser->GetNetworkInputBindingInfo(inputName);
808 }
809 
810 armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
811 {
812  return m_Parser->GetNetworkOutputBindingInfo(outputName);
813 }
814 #endif
ModelOptions m_ModelOptions
Definition: INetwork.hpp:227
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
Definition: INetwork.hpp:68
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
void LogAndThrow(std::string eMsg)
ShapeInferenceMethod m_shapeInferenceMethod
Definition: INetwork.hpp:221
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
const TensorShape & GetShape() const
Definition: Tensor.hpp:191
unsigned int GetNumBytes() const
Definition: Tensor.cpp:427
std::vector< armnn::TensorShape > m_InputTensorShapes
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:246
static IDeserializerPtr Create()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
Definition: INetwork.hpp:218
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
void CompareAndPrintResult(std::vector< const void *> otherOutput) override
Compare the output with the result of another IExecutor.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::vector< std::string > m_OutputTensorFiles
static ITfLiteParserPtr Create(const armnn::Optional< TfLiteParserOptions > &options=armnn::EmptyOptional())
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
Definition: Types.hpp:290
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:71
constexpr const char * GetDataTypeName(DataType dataType)
Definition: TypesUtils.hpp:202
Base class for all descriptors.
Definition: Descriptors.hpp:22
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
Definition: Tensor.hpp:319
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
Definition: INetwork.hpp:208
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1864
int32_t GetQuantizationOffset() const
Definition: Tensor.cpp:478
float GetQuantizationScale() const
Definition: Tensor.cpp:461
DataType GetDataType() const
Definition: Tensor.hpp:198
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
Definition: Tensor.hpp:327
std::vector< std::string > m_InputNames
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
Status
enumeration
Definition: Types.hpp:42
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:239
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:127
virtual LayerType GetType() const =0
Returns the armnn::LayerType of this layer.
std::vector< std::string > m_InputTensorDataFilePaths
void PrintNetworkInfo() override
Print available information about the network.
void ExecuteStrategy(IStrategy &strategy) const
Definition: Network.cpp:2956
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:98
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:93
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
Definition: Tensor.hpp:274
static IOnnxParserPtr Create()
Definition: OnnxParser.cpp:38
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
Status SerializeToDot(std::ostream &stream) const
Definition: Network.cpp:509
std::vector< const void *> Execute() override
Execute the given network.
virtual const IOutputSlot * GetConnection() const =0
Infer missing output shapes and validate all output shapes.
virtual const TensorInfo & GetTensorInfo() const =0
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:238
ArmNNExecutor(const ExecuteNetworkParams &params, armnn::IRuntime::CreationOptions runtimeOptions)
unsigned int GetNumElements() const
Definition: Tensor.hpp:196
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()