ArmNN
 21.11
ExecuteNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
10 
11 #include <armnn/Logging.hpp>
14 #include <InferenceTest.hpp>
15 
16 #if defined(ARMNN_SERIALIZER)
18 #endif
19 #if defined(ARMNN_TF_LITE_PARSER)
21 #endif
22 #if defined(ARMNN_ONNX_PARSER)
24 #endif
25 #if defined(ARMNN_TFLITE_DELEGATE)
26 #include <armnn_delegate.hpp>
27 #include <DelegateOptions.hpp>
28 
29 #include <tensorflow/lite/builtin_ops.h>
30 #include <tensorflow/lite/c/builtin_op_data.h>
31 #include <tensorflow/lite/c/common.h>
32 #include <tensorflow/lite/optional_debug_tools.h>
33 #include <tensorflow/lite/kernels/builtin_op_kernels.h>
34 #include <tensorflow/lite/interpreter.h>
35 #include <tensorflow/lite/kernels/register.h>
36 #endif
37 
38 #include <future>
39 
40 /**
41  * Given a measured duration and a threshold time tell the user whether we succeeded or not.
42  *
43  * @param duration the measured inference duration.
44  * @param thresholdTime the threshold time in milliseconds.
45  * @return false if the measured time exceeded the threshold.
46  */
47 bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
48  const double& thresholdTime)
49 {
50  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
51  << std::fixed << duration.count() << " ms\n";
52  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
53  if (thresholdTime != 0.0)
54  {
55  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
56  << std::fixed << thresholdTime << " ms";
57  auto thresholdMinusInference = thresholdTime - duration.count();
58  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
59  << std::fixed << thresholdMinusInference << " ms" << "\n";
60  if (thresholdMinusInference < 0)
61  {
62  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
63  ARMNN_LOG(fatal) << errorMessage;
64  return false;
65  }
66  }
67  return true;
68 }
69 
70 #if defined(ARMNN_TFLITE_DELEGATE)
71 int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions)
72 {
73  using namespace tflite;
74 
75  std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str());
76 
77  auto tfLiteInterpreter = std::make_unique<Interpreter>();
78  tflite::ops::builtin::BuiltinOpResolver resolver;
79 
80  tflite::InterpreterBuilder builder(*model, resolver);
81  builder(&tfLiteInterpreter);
82  tfLiteInterpreter->AllocateTensors();
83 
84  int status = 0;
86  {
87  // Create the Armnn Delegate
88  // Populate a DelegateOptions from the ExecuteNetworkParams.
89  armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions();
90  delegateOptions.SetExternalProfilingParams(runtimeOptions.m_ProfilingOptions);
91 
92  std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
93  theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
95  // Register armnn_delegate to TfLiteInterpreter
96  status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
97  if (status == kTfLiteError)
98  {
99  ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
100  return EXIT_FAILURE;
101  }
102  }
103  else
104  {
105  std::cout << "Running on TfLite without ArmNN delegate\n";
106  }
107 
110  : armnn::MakeOptional<std::string>(params.m_InputTensorDataFilePaths[0]);
111 
112  const size_t numInputs = params.m_InputNames.size();
113 
114  for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
115  {
116  int input = tfLiteInterpreter->inputs()[inputIndex];
117  TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
118 
119  unsigned int inputSize = 1;
120  if (params.m_InputTensorShapes.size() > 0)
121  {
122  inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements();
123  }
124  else
125  {
126  for (unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
127  {
128  inputSize *= inputDims->data[dim];
129  }
130  }
131 
132  if (params.m_InputTypes[inputIndex].compare("float") == 0)
133  {
134  auto inputData = tfLiteInterpreter->typed_tensor<float>(input);
135 
136  if(inputData == NULL)
137  {
138  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
139  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
140  return EXIT_FAILURE;
141  }
142 
143  std::vector<float> tensorData;
144  PopulateTensorWithDataGeneric<float>(tensorData,
145  inputSize,
146  dataFile,
147  [](const std::string& s)
148  { return std::stof(s); });
149 
150  std::copy(tensorData.begin(), tensorData.end(), inputData);
151  }
152  else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0)
153  {
154  auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
155 
156  if(inputData == NULL)
157  {
158  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
159  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
160  return EXIT_FAILURE;
161  }
162 
163  std::vector<int8_t> tensorData;
164  PopulateTensorWithDataGeneric<int8_t>(tensorData,
165  inputSize,
166  dataFile,
167  [](const std::string& s)
168  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
169 
170  std::copy(tensorData.begin(), tensorData.end(), inputData);
171  }
172  else if (params.m_InputTypes[inputIndex].compare("int") == 0)
173  {
174  auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
175 
176  if(inputData == NULL)
177  {
178  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
179  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
180  return EXIT_FAILURE;
181  }
182 
183  std::vector<int32_t> tensorData;
184  PopulateTensorWithDataGeneric<int32_t>(tensorData,
185  inputSize,
186  dataFile,
187  [](const std::string& s)
188  { return std::stoi(s); });
189 
190  std::copy(tensorData.begin(), tensorData.end(), inputData);
191  }
192  else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 ||
193  params.m_InputTypes[inputIndex].compare("qasymmu8") == 0)
194  {
195  auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
196 
197  if(inputData == NULL)
198  {
199  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
200  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
201  return EXIT_FAILURE;
202  }
203 
204  std::vector<uint8_t> tensorData;
205  PopulateTensorWithDataGeneric<uint8_t>(tensorData,
206  inputSize,
207  dataFile,
208  [](const std::string& s)
209  { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
210 
211  std::copy(tensorData.begin(), tensorData.end(), inputData);
212  }
213  else if (params.m_InputTypes[inputIndex].compare("qasymms8") == 0)
214  {
215  auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
216 
217  if(inputData == NULL)
218  {
219  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
220  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
221  return EXIT_FAILURE;
222  }
223 
224  std::vector<int8_t> tensorData;
225  PopulateTensorWithDataGeneric<int8_t>(tensorData,
226  inputSize,
227  dataFile,
228  [](const std::string& s)
229  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
230 
231  std::copy(tensorData.begin(), tensorData.end(), inputData);
232  }
233  else
234  {
235  ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". ";
236  return EXIT_FAILURE;
237  }
238  }
239 
240  for (size_t x = 0; x < params.m_Iterations; x++)
241  {
242  // Start timer to record inference time in milliseconds.
243  const auto start_time = armnn::GetTimeNow();
244  // Run the inference
245  status = tfLiteInterpreter->Invoke();
246  const auto duration = armnn::GetTimeDuration(start_time);
247 
248  // Print out the output
249  for (unsigned int outputIndex = 0; outputIndex < params.m_OutputNames.size(); ++outputIndex)
250  {
251  auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
252  TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
253  // If we've been asked to write to a file then set a file output stream. Otherwise use stdout.
254  FILE* outputTensorFile = stdout;
255  if (!params.m_OutputTensorFiles.empty())
256  {
257  outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w");
258  if (outputTensorFile == NULL)
259  {
260  ARMNN_LOG(fatal) << "Specified output tensor file, \"" <<
261  params.m_OutputTensorFiles[outputIndex] <<
262  "\", cannot be created. Defaulting to stdout. " <<
263  "Error was: " << std::strerror(errno);
264  outputTensorFile = stdout;
265  }
266  else
267  {
268  ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '"
269  << params.m_OutputTensorFiles[outputIndex] << "'";
270  }
271  }
272  long outputSize = 1;
273  for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
274  {
275  outputSize *= outputDims->data[dim];
276  }
277 
278  std::cout << params.m_OutputNames[outputIndex] << ": ";
279  if (params.m_OutputTypes[outputIndex].compare("float") == 0)
280  {
281  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
282  if(tfLiteDelageOutputData == NULL)
283  {
284  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
285  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
286  return EXIT_FAILURE;
287  }
288 
289  if (!params.m_DontPrintOutputs)
290  {
291  for (int i = 0; i < outputSize; ++i)
292  {
293  fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]);
294  }
295  }
296  }
297  else if (params.m_OutputTypes[outputIndex].compare("int") == 0)
298  {
299  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
300  if(tfLiteDelageOutputData == NULL)
301  {
302  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
303  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
304  return EXIT_FAILURE;
305  }
306 
307  if (!params.m_DontPrintOutputs)
308  {
309  for (int i = 0; i < outputSize; ++i)
310  {
311  fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
312  }
313  }
314  }
315  else if (params.m_OutputTypes[outputIndex].compare("qsymms8") == 0)
316  {
317  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
318  if(tfLiteDelageOutputData == NULL)
319  {
320  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
321  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
322  return EXIT_FAILURE;
323  }
324 
325  if (!params.m_DontPrintOutputs)
326  {
327  for (int i = 0; i < outputSize; ++i)
328  {
329  fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
330  }
331  }
332  }
333  else if (params.m_OutputTypes[outputIndex].compare("qasymm8") == 0 ||
334  params.m_OutputTypes[outputIndex].compare("qasymmu8") == 0)
335  {
336  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
337  if(tfLiteDelageOutputData == NULL)
338  {
339  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
340  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
341  return EXIT_FAILURE;
342  }
343 
344  if (!params.m_DontPrintOutputs)
345  {
346  for (int i = 0; i < outputSize; ++i)
347  {
348  fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]);
349  }
350  }
351  }
352  else
353  {
354  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
355  "\"" << params.m_OutputTypes[outputIndex] <<
356  "\" may be incorrect. Output type can be specified with -z argument";
357  return EXIT_FAILURE;
358  }
359  std::cout << std::endl;
360  }
362  }
363 
364  return status;
365 }
366 #endif
367 template<typename TParser, typename TDataType>
368 int MainImpl(const ExecuteNetworkParams& params,
369  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
370 {
371  using namespace std::chrono;
372 
373  std::vector<std::vector<armnnUtils::TContainer>> inputs;
374  std::vector<std::vector<armnnUtils::TContainer>> outputs;
375 
376  try
377  {
378  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
379  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
380  inferenceModelParams.m_ModelPath = params.m_ModelPath;
381  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
382  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
383  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
384  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
385  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
386  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
387  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
388  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
389  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
390  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
391  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
392  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
393  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
394  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
395  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
396  inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut;
397 
398  for(const std::string& inputName: params.m_InputNames)
399  {
400  inferenceModelParams.m_InputBindings.push_back(inputName);
401  }
402 
403  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
404  {
405  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
406  }
407 
408  for(const std::string& outputName: params.m_OutputNames)
409  {
410  inferenceModelParams.m_OutputBindings.push_back(outputName);
411  }
412 
413  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
414  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
415  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
416 
417  InferenceModel<TParser, TDataType> model(inferenceModelParams,
418  params.m_EnableProfiling,
419  params.m_DynamicBackendsPath,
420  runtime);
421 
422  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
423 
425  armnn::MakeOptional<QuantizationParams>(
426  model.GetInputQuantizationParams()) :
428 
429  if (params.m_InputTensorDataFilePaths.size() > numInputs)
430  {
431  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
432  << "for each input. The user provided "
433  << params.m_InputTensorDataFilePaths.size()
434  << " input-tensor-data file/s which will be used to fill the input/s.\n";
435  }
436 
437  for(unsigned int j = 0; j < params.m_Iterations ; ++j)
438  {
439  std::vector<armnnUtils::TContainer> inputDataContainers;
440  for(unsigned int i = 0; i < numInputs; ++i)
441  {
442  // If there are less input files given than required for the execution of
443  // params.m_Iterations we simply start with the first input file again
444  size_t inputFileIndex = j * numInputs + i;
445  if (!params.m_InputTensorDataFilePaths.empty())
446  {
447  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
448  }
449 
452  armnn::MakeOptional<std::string>(
453  params.m_InputTensorDataFilePaths.at(inputFileIndex));
454 
455  unsigned int numElements = model.GetInputSize(i);
456  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
457  {
458  // If the user has provided a tensor shape for the current input,
459  // override numElements
460  numElements = params.m_InputTensorShapes[i]->GetNumElements();
461  }
462 
463  armnnUtils::TContainer tensorData;
464  PopulateTensorWithData(tensorData,
465  numElements,
466  params.m_InputTypes[i],
467  qParams,
468  dataFile);
469 
470  inputDataContainers.push_back(tensorData);
471  }
472  inputs.push_back(inputDataContainers);
473  }
474 
475  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
476 
477  // The user is allowed to specify the data type of each output tensor. It is used here to construct the
478  // result tensors for each iteration. It is possible for the user to specify a type that does not match
479  // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
480  // The potential problem here is a buffer overrun when a larger data type is written into the space for a
481  // smaller one. Issue a warning to highlight the potential problem.
482  for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
483  {
484  armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
485  switch (type)
486  {
487  // --output-type only supports float, int, qasymms8 or qasymmu8.
489  if (params.m_OutputTypes[outputIdx].compare("float") != 0)
490  {
491  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " <<
492  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
493  ". This may cause unexpected problems or random failures.";
494  }
495  break;
497  if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
498  {
499  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " <<
500  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
501  ". This may cause unexpected problemsor random failures.";
502  }
503  break;
505  if (params.m_OutputTypes[outputIdx].compare("int") != 0)
506  {
507  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " <<
508  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
509  ". This may cause unexpected problems or random failures.";
510  }
511  break;
513  if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
514  {
515  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " <<
516  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
517  ". This may cause unexpected problems or random failures.";
518  }
519  break;
520  default:
521  break;
522  }
523  }
524  for (unsigned int j = 0; j < params.m_Iterations; ++j)
525  {
526  std::vector <armnnUtils::TContainer> outputDataContainers;
527  for (unsigned int i = 0; i < numOutputs; ++i)
528  {
529  if (params.m_OutputTypes[i].compare("float") == 0)
530  {
531  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
532  }
533  else if (params.m_OutputTypes[i].compare("int") == 0)
534  {
535  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
536  }
537  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
538  params.m_OutputTypes[i].compare("qasymmu8") == 0)
539  {
540  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
541  }
542  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
543  {
544  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
545  } else
546  {
547  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
548  return EXIT_FAILURE;
549  }
550  }
551  outputs.push_back(outputDataContainers);
552  }
553 
554  if (params.m_Iterations > 1)
555  {
556  std::stringstream msg;
557  msg << "Network will be executed " << params.m_Iterations;
558  if (params.m_Concurrent)
559  {
560  msg << " times in an asynchronous manner. ";
561  }
562  else
563  {
564  msg << " times successively. ";
565  }
566  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
567  "cover each execution.";
568  ARMNN_LOG(info) << msg.str();
569  }
570 
571  // Synchronous execution
572  if (!params.m_Concurrent)
573  {
574  for (size_t x = 0; x < params.m_Iterations; x++)
575  {
576  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
577  auto inference_duration = model.Run(inputs[x], outputs[x]);
578 
579  if (params.m_GenerateTensorData)
580  {
581  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
582  }
583  if (params.m_DontPrintOutputs)
584  {
585  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
586  }
587 
588  // Print output tensors
589  const auto& infosOut = model.GetOutputBindingInfos();
590  for (size_t i = 0; i < numOutputs; i++)
591  {
592  const armnn::TensorInfo& infoOut = infosOut[i].second;
593 
594  // We've made sure before that the number of output files either equals numOutputs, in which
595  // case we override those files when processing the results of each iteration (only the result
596  // of the last iteration will be stored), or there are enough
597  // output files for each output of each iteration.
598  size_t outputFileIndex = x * numOutputs + i;
599  if (!params.m_OutputTensorFiles.empty())
600  {
601  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
602  ARMNN_LOG(info) << "Writing output " << i << " named: '"
603  << inferenceModelParams.m_OutputBindings[i]
604  << "' of iteration: " << x+1 << " to file: '"
605  << params.m_OutputTensorFiles[outputFileIndex] << "'";
606  }
607  auto outputTensorFile = params.m_OutputTensorFiles.empty()
608  ? ""
609  : params.m_OutputTensorFiles[outputFileIndex];
610 
611  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
612  infoOut,
613  outputTensorFile,
614  params.m_DequantizeOutput,
615  !params.m_DontPrintOutputs);
616  mapbox::util::apply_visitor(printer, outputs[x][i]);
617  }
618 
619  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
620  << std::fixed << inference_duration.count() << " ms\n";
621 
622  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
623  if (params.m_ThresholdTime != 0.0)
624  {
625  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
626  << std::fixed << params.m_ThresholdTime << " ms";
627  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
628  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
629  << std::fixed << thresholdMinusInference << " ms" << "\n";
630 
631  if (thresholdMinusInference < 0)
632  {
633  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
634  ARMNN_LOG(fatal) << errorMessage;
635  }
636  }
637  }
638  }
639  // Asynchronous execution using the Arm NN thread pool
640  else if (params.m_ThreadPoolSize >= 1)
641  {
642  try
643  {
644  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
645  armnn::AsyncCallbackManager callbackManager;
646  std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
647 
648  // Declare the latest and earliest inference times here to be used when calculating overall time
649  std::chrono::high_resolution_clock::time_point earliestStartTime;
650  std::chrono::high_resolution_clock::time_point latestEndTime =
651  std::chrono::high_resolution_clock::now();
652 
653  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
654  // LoadedNetwork with each scheduled inference having a specific priority
655  for (size_t i = 0; i < params.m_Iterations; ++i)
656  {
657  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
658  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
659  model.RunAsync(inputs[i], outputs[i], cb);
660  }
661 
662  // Check the results
663  unsigned int j = 0;
664  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
665  {
666  auto cb = callbackManager.GetNotifiedCallback();
667 
668  // Get the results
669  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
670  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
671  auto inferenceDuration = endTime - startTime;
672 
673  if (latestEndTime < cb->GetEndTime())
674  {
675  latestEndTime = cb->GetEndTime();
676  }
677 
678  if (earliestStartTime.time_since_epoch().count() == 0)
679  {
680  earliestStartTime = cb->GetStartTime();
681  }
682  else if (earliestStartTime > cb->GetStartTime())
683  {
684  earliestStartTime = cb->GetStartTime();
685  }
686 
687  if (params.m_GenerateTensorData)
688  {
689  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
690  }
691  if (params.m_DontPrintOutputs)
692  {
693  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
694  }
695 
696  // Print output tensors
697  const auto& infosOut = model.GetOutputBindingInfos();
698  for (size_t i = 0; i < numOutputs; i++)
699  {
700  // We've made sure before that the number of output files either equals numOutputs, in which
701  // case we override those files when processing the results of each iteration (only the
702  // result of the last iteration will be stored), or there are enough
703  // output files for each output of each iteration.
704  size_t outputFileIndex = iteration * numOutputs + i;
705  if (!params.m_OutputTensorFiles.empty())
706  {
707  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
708  ARMNN_LOG(info) << "Writing output " << i << " named: '"
709  << inferenceModelParams.m_OutputBindings[i]
710  << "' of iteration: " << iteration+1 << " to file: '"
711  << params.m_OutputTensorFiles[outputFileIndex] << "'";
712  }
713 
714  const armnn::TensorInfo& infoOut = infosOut[i].second;
715  auto outputTensorFile = params.m_OutputTensorFiles.empty()
716  ? ""
717  : params.m_OutputTensorFiles[outputFileIndex];
718 
719  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
720  infoOut,
721  outputTensorFile,
722  params.m_DequantizeOutput,
723  !params.m_DontPrintOutputs);
724  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
725  }
726 
727  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
728  ++j;
729  }
730  //print duration difference between overallStartTime and overallEndTime
731  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
732  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
733  auto totalInferenceDuration = overallEndTime - overallStartTime;
734  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
735  << std::fixed << totalInferenceDuration.count() << " ms\n";
736  }
737  catch (const armnn::Exception& e)
738  {
739  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
740  return EXIT_FAILURE;
741  }
742  }
743  // Asynchronous execution using std::launch::async
744  else
745  {
746  try
747  {
748  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
749  std::vector<std::future<std::tuple<unsigned int,
750  std::chrono::duration<double, std::milli>>>> inferenceResults;
751  inferenceResults.reserve(params.m_Iterations);
752 
753  // Create WorkingMemHandles for each inference
754  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
755  workingMemHandles.reserve(params.m_Iterations);
756  for (unsigned int i = 0; i < params.m_Iterations; ++i)
757  {
758  workingMemHandles.push_back(model.CreateWorkingMemHandle());
759  }
760 
761  // Run each inference in its own thread
762  // start a timer
763  const auto start_time = armnn::GetTimeNow();
764  for (unsigned int i = 0; i < params.m_Iterations; ++i)
765  {
766  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
767 
768  inferenceResults.push_back(std::async(
769  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
770  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
771  }
772  ));
773  }
774 
775  // Check the results
776  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
777  {
778  // Get the results
779  auto inferenceResult = inferenceResults[j].get();
780  auto inferenceDuration = std::get<1>(inferenceResult);
781  auto inferenceID = std::get<0>(inferenceResult);
782 
783  if (params.m_GenerateTensorData)
784  {
785  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
786  }
787  if (params.m_DontPrintOutputs)
788  {
789  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
790  }
791 
792  // Print output tensors
793  const auto& infosOut = model.GetOutputBindingInfos();
794  for (size_t i = 0; i < numOutputs; i++)
795  {
796  // We've made sure before that the number of output files either equals numOutputs, in which
797  // case we override those files when processing the results of each iteration (only the
798  // result of the last iteration will be stored), or there are enough
799  // output files for each output of each iteration.
800  size_t outputFileIndex = j * numOutputs + i;
801  if (!params.m_OutputTensorFiles.empty())
802  {
803  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
804  ARMNN_LOG(info) << "Writing output " << i << " named: '"
805  << inferenceModelParams.m_OutputBindings[i]
806  << "' of iteration: " << j+1 << " to file: '"
807  << params.m_OutputTensorFiles[outputFileIndex] << "'";
808  }
809  const armnn::TensorInfo& infoOut = infosOut[i].second;
810  auto outputTensorFile = params.m_OutputTensorFiles.empty()
811  ? ""
812  : params.m_OutputTensorFiles[outputFileIndex];
813 
814  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
815  infoOut,
816  outputTensorFile,
817  params.m_DequantizeOutput,
818  !params.m_DontPrintOutputs);
819  mapbox::util::apply_visitor(printer, outputs[j][i]);
820  }
821  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
822  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
823  }
824  // finish timer
825  const auto duration = armnn::GetTimeDuration(start_time);
826  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
827  << std::fixed << duration.count() << " ms\n";
828  }
829  catch (const armnn::Exception& e)
830  {
831  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
832  return EXIT_FAILURE;
833  }
834  }
835  }
836  catch (const armnn::Exception& e)
837  {
838  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
839  return EXIT_FAILURE;
840  }
841 
842  return EXIT_SUCCESS;
843 }
844 
845 // MAIN
846 int main(int argc, const char* argv[])
847 {
848  // Configures logging for both the ARMNN library and this test program.
849  #ifdef NDEBUG
851  #else
853  #endif
854  armnn::ConfigureLogging(true, true, level);
855 
856 
857  // Get ExecuteNetwork parameters and runtime options from command line
858  // This might throw an InvalidArgumentException if the user provided invalid inputs
860  try {
861  ProgramOptions.ParseOptions(argc, argv);
862  } catch (const std::exception &e){
863  ARMNN_LOG(fatal) << e.what();
864  return EXIT_FAILURE;
865  }
866 
867  if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
869  && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
870  {
871  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
872  return EXIT_FAILURE;
873  }
874 
875  // Create runtime
876  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
877 
878  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
879 
880  // Forward to implementation based on the parser type
881  if (modelFormat.find("armnn") != std::string::npos)
882  {
883  #if defined(ARMNN_SERIALIZER)
884  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
885  #else
886  ARMNN_LOG(fatal) << "Not built with serialization support.";
887  return EXIT_FAILURE;
888  #endif
889  }
890  else if (modelFormat.find("onnx") != std::string::npos)
891  {
892  #if defined(ARMNN_ONNX_PARSER)
893  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
894  #else
895  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
896  return EXIT_FAILURE;
897  #endif
898  }
899  else if(modelFormat.find("tflite") != std::string::npos)
900  {
902  {
903  #if defined(ARMNN_TF_LITE_PARSER)
904  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
905  #else
906  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
907  return EXIT_FAILURE;
908  #endif
909  }
910  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
912  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
914  {
915  #if defined(ARMNN_TF_LITE_DELEGATE)
916  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
917  #else
918  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
919  return EXIT_FAILURE;
920  #endif
921  }
922  }
923  else
924  {
925  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
926  << "'. Please include 'tflite' or 'onnx'";
927  return EXIT_FAILURE;
928  }
929 }
ExecuteNetworkParams m_ExNetParams
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
std::vector< std::string > m_InputTypes
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
std::vector< std::string > m_OutputNames
std::vector< std::string > m_OutputTensorFiles
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
DataType
Definition: Types.hpp:35
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
std::vector< std::string > m_InputTensorDataFilePaths
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
TfLiteDelegate * TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
void SetExternalProfilingParams(const armnn::IRuntime::CreationOptions::ExternalProfilingOptions &externalProfilingParams)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
int main(int argc, const char *argv[])
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Definition: TContainer.hpp:18
unsigned int GetInputSize(unsigned int inputIndex=0u) const
void TfLiteArmnnDelegateDelete(TfLiteDelegate *tfLiteDelegate)
int MainImpl(const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:184
LogSeverity
Definition: Utils.hpp:14
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()