ArmNN
 22.02
ExecuteNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
10 
11 #include <armnn/Logging.hpp>
14 #include <InferenceTest.hpp>
15 
16 #if defined(ARMNN_SERIALIZER)
18 #endif
19 #if defined(ARMNN_TF_LITE_PARSER)
21 #endif
22 #if defined(ARMNN_ONNX_PARSER)
24 #endif
25 #if defined(ARMNN_TFLITE_DELEGATE)
26 #include <armnn_delegate.hpp>
27 #include <DelegateOptions.hpp>
28 
29 #include <tensorflow/lite/builtin_ops.h>
30 #include <tensorflow/lite/c/builtin_op_data.h>
31 #include <tensorflow/lite/c/common.h>
32 #include <tensorflow/lite/optional_debug_tools.h>
33 #include <tensorflow/lite/kernels/builtin_op_kernels.h>
34 #include <tensorflow/lite/interpreter.h>
35 #include <tensorflow/lite/kernels/register.h>
36 #endif
37 
38 #include <future>
39 
40 /**
41  * Given a measured duration and a threshold time tell the user whether we succeeded or not.
42  *
43  * @param duration the measured inference duration.
44  * @param thresholdTime the threshold time in milliseconds.
45  * @return false if the measured time exceeded the threshold.
46  */
47 bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
48  const double& thresholdTime)
49 {
50  ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
51  << std::fixed << duration.count() << " ms\n";
52  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
53  if (thresholdTime != 0.0)
54  {
55  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
56  << std::fixed << thresholdTime << " ms";
57  auto thresholdMinusInference = thresholdTime - duration.count();
58  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
59  << std::fixed << thresholdMinusInference << " ms" << "\n";
60  if (thresholdMinusInference < 0)
61  {
62  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
63  ARMNN_LOG(fatal) << errorMessage;
64  return false;
65  }
66  }
67  return true;
68 }
69 
70 #if defined(ARMNN_TFLITE_DELEGATE)
71 int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions)
72 {
73  // Build model and corresponding interpreter
74  using namespace tflite;
75 
76  std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str());
77 
78  auto tfLiteInterpreter = std::make_unique<Interpreter>();
79  tflite::ops::builtin::BuiltinOpResolver resolver;
80 
81  tflite::InterpreterBuilder builder(*model, resolver);
82  builder(&tfLiteInterpreter);
83  tfLiteInterpreter->AllocateTensors();
84 
85  int status = 0;
86 
87  // Create & populate Armnn Delegate, then register it to TfLiteInterpreter
89  {
90  // Create the Armnn Delegate
91  // Populate a DelegateOptions from the ExecuteNetworkParams.
92  armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions();
93  delegateOptions.SetExternalProfilingParams(runtimeOptions.m_ProfilingOptions);
94 
95  std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
96  theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
98  // Register armnn_delegate to TfLiteInterpreter
99  status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
100  if (status != kTfLiteOk)
101  {
102  ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
103  return EXIT_FAILURE;
104  }
105  }
106  else
107  {
108  std::cout << "Running on TfLite without ArmNN delegate\n";
109  }
110 
111  // Load (or generate) input data for inference
114  : armnn::MakeOptional<std::string>(params.m_InputTensorDataFilePaths[0]);
115 
116  const size_t numInputs = params.m_InputNames.size();
117 
118  // Populate input tensor of interpreter
119  for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
120  {
121  int input = tfLiteInterpreter->inputs()[inputIndex];
122  TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
123 
124  unsigned int inputSize = 1;
125  if (params.m_InputTensorShapes.size() > 0)
126  {
127  inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements();
128  }
129  else
130  {
131  for (unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
132  {
133  inputSize *= inputDims->data[dim];
134  }
135  }
136 
137  if (params.m_InputTypes[inputIndex].compare("float") == 0)
138  {
139  auto inputData = tfLiteInterpreter->typed_tensor<float>(input);
140 
141  if(inputData == NULL)
142  {
143  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
144  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
145  return EXIT_FAILURE;
146  }
147 
148  std::vector<float> tensorData;
149  PopulateTensorWithDataGeneric<float>(tensorData,
150  inputSize,
151  dataFile,
152  [](const std::string& s)
153  { return std::stof(s); });
154 
155  std::copy(tensorData.begin(), tensorData.end(), inputData);
156  }
157  else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0 ||
158  params.m_InputTypes[inputIndex].compare("qasymms8") == 0)
159  {
160  auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
161 
162  if(inputData == NULL)
163  {
164  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
165  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
166  return EXIT_FAILURE;
167  }
168 
169  std::vector<int8_t> tensorData;
170  PopulateTensorWithDataGeneric<int8_t>(tensorData,
171  inputSize,
172  dataFile,
173  [](const std::string& s)
174  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
175 
176  std::copy(tensorData.begin(), tensorData.end(), inputData);
177  }
178  else if (params.m_InputTypes[inputIndex].compare("int") == 0)
179  {
180  auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
181 
182  if(inputData == NULL)
183  {
184  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
185  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
186  return EXIT_FAILURE;
187  }
188 
189  std::vector<int32_t> tensorData;
190  PopulateTensorWithDataGeneric<int32_t>(tensorData,
191  inputSize,
192  dataFile,
193  [](const std::string& s)
194  { return std::stoi(s); });
195 
196  std::copy(tensorData.begin(), tensorData.end(), inputData);
197  }
198  else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 ||
199  params.m_InputTypes[inputIndex].compare("qasymmu8") == 0)
200  {
201  auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
202 
203  if(inputData == NULL)
204  {
205  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
206  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
207  return EXIT_FAILURE;
208  }
209 
210  std::vector<uint8_t> tensorData;
211  PopulateTensorWithDataGeneric<uint8_t>(tensorData,
212  inputSize,
213  dataFile,
214  [](const std::string& s)
215  { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
216 
217  std::copy(tensorData.begin(), tensorData.end(), inputData);
218  }
219  else
220  {
221  ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". ";
222  return EXIT_FAILURE;
223  }
224  }
225 
226  // Run inference, print the output of the inference
227  for (size_t x = 0; x < params.m_Iterations; x++)
228  {
229  // Start timer to record inference time in milliseconds.
230  const auto start_time = armnn::GetTimeNow();
231  // Run the inference
232  status = tfLiteInterpreter->Invoke();
233  const auto duration = armnn::GetTimeDuration(start_time);
234 
235  // The TFLite interpreter's outputs might be in a different order than the user inputted output names.
236  std::map<unsigned int, int> paramToTfliteOutputIndex;
237  for (unsigned int paramIndex = 0; paramIndex < params.m_OutputNames.size(); ++paramIndex)
238  {
239  paramToTfliteOutputIndex[paramIndex] = -1;
240  for (unsigned int tfLiteIndex = 0; tfLiteIndex < tfLiteInterpreter->outputs().size(); ++tfLiteIndex)
241  {
242  if (params.m_OutputNames[paramIndex] == tfLiteInterpreter->GetOutputName(tfLiteIndex))
243  {
244  paramToTfliteOutputIndex[paramIndex] = tfLiteIndex;
245  }
246  }
247  }
248 
249  // Print out the output
250  for (unsigned int paramOutputIndex = 0; paramOutputIndex < params.m_OutputNames.size(); ++paramOutputIndex)
251  {
252  int outputIndex = paramToTfliteOutputIndex[paramOutputIndex];
253  if (outputIndex == -1)
254  {
255  std::cout << fmt::format("Output name: {} doesn't exist.", params.m_OutputNames[paramOutputIndex]) <<
256  std::endl;
257  continue;
258  }
259  auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
260  TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
261  // If we've been asked to write to a file then set a file output stream. Otherwise use stdout.
262  FILE* outputTensorFile = stdout;
263  if (!params.m_OutputTensorFiles.empty())
264  {
265  outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w");
266  if (outputTensorFile == NULL)
267  {
268  ARMNN_LOG(fatal) << "Specified output tensor file, \"" <<
269  params.m_OutputTensorFiles[outputIndex] <<
270  "\", cannot be created. Defaulting to stdout. " <<
271  "Error was: " << std::strerror(errno);
272  outputTensorFile = stdout;
273  }
274  else
275  {
276  ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '"
277  << params.m_OutputTensorFiles[outputIndex] << "'";
278  }
279  }
280  long outputSize = 1;
281  for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
282  {
283  outputSize *= outputDims->data[dim];
284  }
285 
286  std::cout << tfLiteInterpreter->GetOutputName(outputIndex) << ": ";
287  if (params.m_OutputTypes[paramOutputIndex].compare("float") == 0)
288  {
289  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
290  if(tfLiteDelageOutputData == NULL)
291  {
292  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
293  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
294  return EXIT_FAILURE;
295  }
296 
297  if (!params.m_DontPrintOutputs)
298  {
299  for (int i = 0; i < outputSize; ++i)
300  {
301  fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]);
302  }
303  }
304  }
305  else if (params.m_OutputTypes[paramOutputIndex].compare("int") == 0)
306  {
307  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
308  if(tfLiteDelageOutputData == NULL)
309  {
310  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
311  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
312  return EXIT_FAILURE;
313  }
314 
315  if (!params.m_DontPrintOutputs)
316  {
317  for (int i = 0; i < outputSize; ++i)
318  {
319  fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
320  }
321  }
322  }
323  else if (params.m_OutputTypes[paramOutputIndex].compare("qsymms8") == 0 ||
324  params.m_OutputTypes[paramOutputIndex].compare("qasymms8") == 0)
325  {
326  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
327  if(tfLiteDelageOutputData == NULL)
328  {
329  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
330  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
331  return EXIT_FAILURE;
332  }
333 
334  if (!params.m_DontPrintOutputs)
335  {
336  for (int i = 0; i < outputSize; ++i)
337  {
338  fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
339  }
340  }
341  }
342  else if (params.m_OutputTypes[paramOutputIndex].compare("qasymm8") == 0 ||
343  params.m_OutputTypes[paramOutputIndex].compare("qasymmu8") == 0)
344  {
345  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
346  if(tfLiteDelageOutputData == NULL)
347  {
348  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
349  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
350  return EXIT_FAILURE;
351  }
352 
353  if (!params.m_DontPrintOutputs)
354  {
355  for (int i = 0; i < outputSize; ++i)
356  {
357  fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]);
358  }
359  }
360  }
361  else
362  {
363  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
364  "\"" << params.m_OutputTypes[paramOutputIndex] <<
365  "\" may be incorrect. Output type can be specified with -z argument";
366  return EXIT_FAILURE;
367  }
368  std::cout << std::endl;
369  }
371  }
372 
373  return status;
374 }
375 #endif
376 template<typename TParser, typename TDataType>
377 int MainImpl(const ExecuteNetworkParams& params,
378  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
379 {
380  using namespace std::chrono;
381 
382  std::vector<std::vector<armnnUtils::TContainer>> inputs;
383  std::vector<std::vector<armnnUtils::TContainer>> outputs;
384 
385  try
386  {
387  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
388  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
389  inferenceModelParams.m_ModelPath = params.m_ModelPath;
390  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
391  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
392  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
393  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
394  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
395  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
396  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
397  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
398  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
399  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
400  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
401  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
402  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
403  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
404  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
405  inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut;
406 
407  for(const std::string& inputName: params.m_InputNames)
408  {
409  inferenceModelParams.m_InputBindings.push_back(inputName);
410  }
411 
412  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
413  {
414  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
415  }
416 
417  for(const std::string& outputName: params.m_OutputNames)
418  {
419  inferenceModelParams.m_OutputBindings.push_back(outputName);
420  }
421 
422  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
423  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
424  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
425 
426  InferenceModel<TParser, TDataType> model(inferenceModelParams,
427  params.m_EnableProfiling,
428  params.m_DynamicBackendsPath,
429  runtime);
430 
431  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
432 
434  armnn::MakeOptional<QuantizationParams>(
435  model.GetInputQuantizationParams()) :
437 
438  if (params.m_InputTensorDataFilePaths.size() > numInputs)
439  {
440  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
441  << "for each input. The user provided "
442  << params.m_InputTensorDataFilePaths.size()
443  << " input-tensor-data file/s which will be used to fill the input/s.\n";
444  }
445 
446  for(unsigned int j = 0; j < params.m_Iterations ; ++j)
447  {
448  std::vector<armnnUtils::TContainer> inputDataContainers;
449  for(unsigned int i = 0; i < numInputs; ++i)
450  {
451  // If there are fewer input files given than required for the execution of
452  // params.m_Iterations we simply start with the first input file again
453  size_t inputFileIndex = j * numInputs + i;
454  if (!params.m_InputTensorDataFilePaths.empty())
455  {
456  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
457  }
458 
461  armnn::MakeOptional<std::string>(
462  params.m_InputTensorDataFilePaths.at(inputFileIndex));
463 
464  unsigned int numElements = model.GetInputSize(i);
465  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
466  {
467  // If the user has provided a tensor shape for the current input,
468  // override numElements
469  numElements = params.m_InputTensorShapes[i]->GetNumElements();
470  }
471 
472  armnnUtils::TContainer tensorData;
473  PopulateTensorWithData(tensorData,
474  numElements,
475  params.m_InputTypes[i],
476  qParams,
477  dataFile);
478 
479  inputDataContainers.push_back(tensorData);
480  }
481  inputs.push_back(inputDataContainers);
482  }
483 
484  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
485 
486  // The user is allowed to specify the data type of each output tensor. It is used here to construct the
487  // result tensors for each iteration. It is possible for the user to specify a type that does not match
488  // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
489  // The potential problem here is a buffer overrun when a larger data type is written into the space for a
490  // smaller one. Issue a warning to highlight the potential problem.
491  for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
492  {
493  armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
494  switch (type)
495  {
496  // --output-type only supports float, int, qasymms8 or qasymmu8.
498  if (params.m_OutputTypes[outputIdx].compare("float") != 0)
499  {
500  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " <<
501  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
502  ". This may cause unexpected problems or random failures.";
503  }
504  break;
506  if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
507  {
508  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " <<
509  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
510  ". This may cause unexpected problemsor random failures.";
511  }
512  break;
514  if (params.m_OutputTypes[outputIdx].compare("int") != 0)
515  {
516  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " <<
517  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
518  ". This may cause unexpected problems or random failures.";
519  }
520  break;
522  if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
523  {
524  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " <<
525  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
526  ". This may cause unexpected problems or random failures.";
527  }
528  break;
529  default:
530  break;
531  }
532  }
533  for (unsigned int j = 0; j < params.m_Iterations; ++j)
534  {
535  std::vector <armnnUtils::TContainer> outputDataContainers;
536  for (unsigned int i = 0; i < numOutputs; ++i)
537  {
538  if (params.m_OutputTypes[i].compare("float") == 0)
539  {
540  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
541  }
542  else if (params.m_OutputTypes[i].compare("int") == 0)
543  {
544  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
545  }
546  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
547  params.m_OutputTypes[i].compare("qasymmu8") == 0)
548  {
549  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
550  }
551  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
552  {
553  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
554  } else
555  {
556  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
557  return EXIT_FAILURE;
558  }
559  }
560  outputs.push_back(outputDataContainers);
561  }
562 
563  if (params.m_Iterations > 1)
564  {
565  std::stringstream msg;
566  msg << "Network will be executed " << params.m_Iterations;
567  if (params.m_Concurrent)
568  {
569  msg << " times in an asynchronous manner. ";
570  }
571  else
572  {
573  msg << " times successively. ";
574  }
575  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
576  "cover each execution.";
577  ARMNN_LOG(info) << msg.str();
578  }
579 
580  // Synchronous execution
581  if (!params.m_Concurrent)
582  {
583  for (size_t x = 0; x < params.m_Iterations; x++)
584  {
585  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
586  auto inference_duration = model.Run(inputs[x], outputs[x]);
587 
588  if (params.m_GenerateTensorData)
589  {
590  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
591  }
592  if (params.m_DontPrintOutputs)
593  {
594  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
595  }
596 
597  // Print output tensors
598  const auto& infosOut = model.GetOutputBindingInfos();
599  for (size_t i = 0; i < numOutputs; i++)
600  {
601  const armnn::TensorInfo& infoOut = infosOut[i].second;
602 
603  // We've made sure before that the number of output files either equals numOutputs, in which
604  // case we override those files when processing the results of each iteration (only the result
605  // of the last iteration will be stored), or there are enough
606  // output files for each output of each iteration.
607  size_t outputFileIndex = x * numOutputs + i;
608  if (!params.m_OutputTensorFiles.empty())
609  {
610  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
611  ARMNN_LOG(info) << "Writing output " << i << " named: '"
612  << inferenceModelParams.m_OutputBindings[i]
613  << "' of iteration: " << x+1 << " to file: '"
614  << params.m_OutputTensorFiles[outputFileIndex] << "'";
615  }
616  auto outputTensorFile = params.m_OutputTensorFiles.empty()
617  ? ""
618  : params.m_OutputTensorFiles[outputFileIndex];
619 
620  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
621  infoOut,
622  outputTensorFile,
623  params.m_DequantizeOutput,
624  !params.m_DontPrintOutputs);
625  mapbox::util::apply_visitor(printer, outputs[x][i]);
626  }
627 
628  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
629  << std::fixed << inference_duration.count() << " ms\n";
630 
631  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
632  if (params.m_ThresholdTime != 0.0)
633  {
634  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
635  << std::fixed << params.m_ThresholdTime << " ms";
636  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
637  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
638  << std::fixed << thresholdMinusInference << " ms" << "\n";
639 
640  if (thresholdMinusInference < 0)
641  {
642  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
643  ARMNN_LOG(fatal) << errorMessage;
644  }
645  }
646  }
647  }
648  // Asynchronous execution using the Arm NN thread pool
649  else if (params.m_ThreadPoolSize >= 1)
650  {
651  try
652  {
653  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
654  armnn::AsyncCallbackManager callbackManager;
655  std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
656 
657  // Declare the latest and earliest inference times here to be used when calculating overall time
658  std::chrono::high_resolution_clock::time_point earliestStartTime;
659  std::chrono::high_resolution_clock::time_point latestEndTime =
660  std::chrono::high_resolution_clock::now();
661 
662  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
663  // LoadedNetwork with each scheduled inference having a specific priority
664  for (size_t i = 0; i < params.m_Iterations; ++i)
665  {
666  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
667  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
668  model.RunAsync(inputs[i], outputs[i], cb);
669  }
670 
671  // Check the results
672  unsigned int j = 0;
673  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
674  {
675  auto cb = callbackManager.GetNotifiedCallback();
676 
677  // Get the results
678  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
679  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
680  auto inferenceDuration = endTime - startTime;
681 
682  if (latestEndTime < cb->GetEndTime())
683  {
684  latestEndTime = cb->GetEndTime();
685  }
686 
687  if (earliestStartTime.time_since_epoch().count() == 0)
688  {
689  earliestStartTime = cb->GetStartTime();
690  }
691  else if (earliestStartTime > cb->GetStartTime())
692  {
693  earliestStartTime = cb->GetStartTime();
694  }
695 
696  if (params.m_GenerateTensorData)
697  {
698  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
699  }
700  if (params.m_DontPrintOutputs)
701  {
702  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
703  }
704 
705  // Print output tensors
706  const auto& infosOut = model.GetOutputBindingInfos();
707  for (size_t i = 0; i < numOutputs; i++)
708  {
709  // We've made sure before that the number of output files either equals numOutputs, in which
710  // case we override those files when processing the results of each iteration (only the
711  // result of the last iteration will be stored), or there are enough
712  // output files for each output of each iteration.
713  size_t outputFileIndex = iteration * numOutputs + i;
714  if (!params.m_OutputTensorFiles.empty())
715  {
716  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
717  ARMNN_LOG(info) << "Writing output " << i << " named: '"
718  << inferenceModelParams.m_OutputBindings[i]
719  << "' of iteration: " << iteration+1 << " to file: '"
720  << params.m_OutputTensorFiles[outputFileIndex] << "'";
721  }
722 
723  const armnn::TensorInfo& infoOut = infosOut[i].second;
724  auto outputTensorFile = params.m_OutputTensorFiles.empty()
725  ? ""
726  : params.m_OutputTensorFiles[outputFileIndex];
727 
728  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
729  infoOut,
730  outputTensorFile,
731  params.m_DequantizeOutput,
732  !params.m_DontPrintOutputs);
733  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
734  }
735 
736  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
737  ++j;
738  }
739  //print duration difference between overallStartTime and overallEndTime
740  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
741  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
742  auto totalInferenceDuration = overallEndTime - overallStartTime;
743  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
744  << std::fixed << totalInferenceDuration.count() << " ms\n";
745  }
746  catch (const armnn::Exception& e)
747  {
748  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
749  return EXIT_FAILURE;
750  }
751  }
752  // Asynchronous execution using std::launch::async
753  else
754  {
755  try
756  {
757  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
758  std::vector<std::future<std::tuple<unsigned int,
759  std::chrono::duration<double, std::milli>>>> inferenceResults;
760  inferenceResults.reserve(params.m_Iterations);
761 
762  // Create WorkingMemHandles for each inference
763  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
764  workingMemHandles.reserve(params.m_Iterations);
765  for (unsigned int i = 0; i < params.m_Iterations; ++i)
766  {
767  workingMemHandles.push_back(model.CreateWorkingMemHandle());
768  }
769 
770  // Run each inference in its own thread
771  // start a timer
772  const auto start_time = armnn::GetTimeNow();
773  for (unsigned int i = 0; i < params.m_Iterations; ++i)
774  {
775  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
776 
777  inferenceResults.push_back(std::async(
778  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
779  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
780  }
781  ));
782  }
783 
784  // Check the results
785  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
786  {
787  // Get the results
788  auto inferenceResult = inferenceResults[j].get();
789  auto inferenceDuration = std::get<1>(inferenceResult);
790  auto inferenceID = std::get<0>(inferenceResult);
791 
792  if (params.m_GenerateTensorData)
793  {
794  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
795  }
796  if (params.m_DontPrintOutputs)
797  {
798  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
799  }
800 
801  // Print output tensors
802  const auto& infosOut = model.GetOutputBindingInfos();
803  for (size_t i = 0; i < numOutputs; i++)
804  {
805  // We've made sure before that the number of output files either equals numOutputs, in which
806  // case we override those files when processing the results of each iteration (only the
807  // result of the last iteration will be stored), or there are enough
808  // output files for each output of each iteration.
809  size_t outputFileIndex = j * numOutputs + i;
810  if (!params.m_OutputTensorFiles.empty())
811  {
812  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
813  ARMNN_LOG(info) << "Writing output " << i << " named: '"
814  << inferenceModelParams.m_OutputBindings[i]
815  << "' of iteration: " << j+1 << " to file: '"
816  << params.m_OutputTensorFiles[outputFileIndex] << "'";
817  }
818  const armnn::TensorInfo& infoOut = infosOut[i].second;
819  auto outputTensorFile = params.m_OutputTensorFiles.empty()
820  ? ""
821  : params.m_OutputTensorFiles[outputFileIndex];
822 
823  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
824  infoOut,
825  outputTensorFile,
826  params.m_DequantizeOutput,
827  !params.m_DontPrintOutputs);
828  mapbox::util::apply_visitor(printer, outputs[j][i]);
829  }
830  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
831  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
832  }
833  // finish timer
834  const auto duration = armnn::GetTimeDuration(start_time);
835  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
836  << std::fixed << duration.count() << " ms\n";
837  }
838  catch (const armnn::Exception& e)
839  {
840  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
841  return EXIT_FAILURE;
842  }
843  }
844  }
845  catch (const armnn::Exception& e)
846  {
847  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
848  return EXIT_FAILURE;
849  }
850 
851  return EXIT_SUCCESS;
852 }
853 
854 // MAIN
855 int main(int argc, const char* argv[])
856 {
857  // Configures logging for both the ARMNN library and this test program.
858  #ifdef NDEBUG
860  #else
862  #endif
863  armnn::ConfigureLogging(true, true, level);
864 
865 
866  // Get ExecuteNetwork parameters and runtime options from command line
867  // This might throw an InvalidArgumentException if the user provided invalid inputs
869  try {
870  ProgramOptions.ParseOptions(argc, argv);
871  } catch (const std::exception &e){
872  ARMNN_LOG(fatal) << e.what();
873  return EXIT_FAILURE;
874  }
875 
876  if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
878  && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
879  {
880  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
881  return EXIT_FAILURE;
882  }
883 
884  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
885 
886  // Forward to implementation based on the parser type
887  if (modelFormat.find("armnn") != std::string::npos)
888  {
889  #if defined(ARMNN_SERIALIZER)
890  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
891  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
892  #else
893  ARMNN_LOG(fatal) << "Not built with serialization support.";
894  return EXIT_FAILURE;
895  #endif
896  }
897  else if (modelFormat.find("onnx") != std::string::npos)
898  {
899  #if defined(ARMNN_ONNX_PARSER)
900  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
901  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
902  #else
903  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
904  return EXIT_FAILURE;
905  #endif
906  }
907  else if(modelFormat.find("tflite") != std::string::npos)
908  {
910  {
911  #if defined(ARMNN_TF_LITE_PARSER)
912  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
913  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
914  #else
915  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
916  return EXIT_FAILURE;
917  #endif
918  }
919  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
921  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
923  {
924  #if defined(ARMNN_TF_LITE_DELEGATE)
925  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
926  #else
927  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
928  return EXIT_FAILURE;
929  #endif
930  }
931  }
932  else
933  {
934  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
935  << "'. Please include 'tflite' or 'onnx'";
936  return EXIT_FAILURE;
937  }
938 }
ExecuteNetworkParams m_ExNetParams
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
std::vector< std::string > m_InputTypes
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
std::vector< std::string > m_OutputNames
std::vector< std::string > m_OutputTensorFiles
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
DataType
Definition: Types.hpp:35
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
std::vector< std::string > m_InputTensorDataFilePaths
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
TfLiteDelegate * TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
void SetExternalProfilingParams(const armnn::IRuntime::CreationOptions::ExternalProfilingOptions &externalProfilingParams)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
int main(int argc, const char *argv[])
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Definition: TContainer.hpp:18
unsigned int GetInputSize(unsigned int inputIndex=0u) const
void TfLiteArmnnDelegateDelete(TfLiteDelegate *tfLiteDelegate)
int MainImpl(const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:151
LogSeverity
Definition: Utils.hpp:14
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()