ArmNN
 22.05
ExecuteNetwork.cpp File Reference

Go to the source code of this file.

Functions

bool CheckInferenceTimeThreshold (const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
 Given a measured duration and a threshold time tell the user whether we succeeded or not. More...
 
template<typename TParser , typename TDataType >
int MainImpl (const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
 
int main (int argc, const char *argv[])
 

Function Documentation

◆ CheckInferenceTimeThreshold()

bool CheckInferenceTimeThreshold ( const std::chrono::duration< double, std::milli > &  duration,
const double &  thresholdTime 
)

Given a measured duration and a threshold time tell the user whether we succeeded or not.

Parameters
durationthe measured inference duration.
thresholdTimethe threshold time in milliseconds.
Returns
false if the measured time exceeded the threshold.

Definition at line 48 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, arm::pipe::ConvertExternalProfilingOptions(), armnn::GetTimeDuration(), armnn::GetTimeNow(), ExecuteNetworkParams::m_DontPrintOutputs, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_InputNames, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_ModelPath, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, IRuntime::CreationOptions::m_ProfilingOptions, ExecuteNetworkParams::m_TfLiteExecutor, ExecuteNetworkParams::m_ThresholdTime, armnn::numeric_cast(), DelegateOptions::SetExternalProfilingParams(), armnnDelegate::TfLiteArmnnDelegateCreate(), and armnnDelegate::TfLiteArmnnDelegateDelete().

Referenced by MainImpl().

50 {
51  ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
52  << std::fixed << duration.count() << " ms\n";
53  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
54  if (thresholdTime != 0.0)
55  {
56  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
57  << std::fixed << thresholdTime << " ms";
58  auto thresholdMinusInference = thresholdTime - duration.count();
59  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
60  << std::fixed << thresholdMinusInference << " ms" << "\n";
61  if (thresholdMinusInference < 0)
62  {
63  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
64  ARMNN_LOG(fatal) << errorMessage;
65  return false;
66  }
67  }
68  return true;
69 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

◆ main()

int main ( int  argc,
const char *  argv[] 
)

Definition at line 996 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, ExecuteNetworkParams::ArmNNTfLiteParser, armnn::ConfigureLogging(), IRuntime::Create(), armnn::Debug, armnn::Info, ExecuteNetworkParams::m_EnableProfiling, ProgramOptions::m_ExNetParams, ExecuteNetworkParams::m_ModelFormat, ExecuteNetworkParams::m_OutputDetailsOnlyToStdOut, ExecuteNetworkParams::m_OutputDetailsToStdOut, ProgramOptions::m_RuntimeOptions, ExecuteNetworkParams::m_TfLiteExecutor, ProgramOptions::ParseOptions(), and ExecuteNetworkParams::TfliteInterpreter.

997 {
998  // Configures logging for both the ARMNN library and this test program.
999  #ifdef NDEBUG
1001  #else
1003  #endif
1004  armnn::ConfigureLogging(true, true, level);
1005 
1006 
1007  // Get ExecuteNetwork parameters and runtime options from command line
1008  // This might throw an InvalidArgumentException if the user provided invalid inputs
1010  try {
1011  ProgramOptions.ParseOptions(argc, argv);
1012  } catch (const std::exception &e){
1013  ARMNN_LOG(fatal) << e.what();
1014  return EXIT_FAILURE;
1015  }
1016 
1017  if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
1019  && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
1020  {
1021  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
1022  return EXIT_FAILURE;
1023  }
1024 
1025  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
1026 
1027  // Forward to implementation based on the parser type
1028  if (modelFormat.find("armnn") != std::string::npos)
1029  {
1030  #if defined(ARMNN_SERIALIZER)
1031  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
1032  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
1033  #else
1034  ARMNN_LOG(fatal) << "Not built with serialization support.";
1035  return EXIT_FAILURE;
1036  #endif
1037  }
1038  else if (modelFormat.find("onnx") != std::string::npos)
1039  {
1040  #if defined(ARMNN_ONNX_PARSER)
1041  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
1042  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
1043  #else
1044  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
1045  return EXIT_FAILURE;
1046  #endif
1047  }
1048  else if(modelFormat.find("tflite") != std::string::npos)
1049  {
1051  {
1052  #if defined(ARMNN_TF_LITE_PARSER)
1053  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
1054  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
1055  #else
1056  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
1057  return EXIT_FAILURE;
1058  #endif
1059  }
1060  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
1062  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
1064  {
1065  #if defined(ARMNN_TF_LITE_DELEGATE)
1066  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
1067  #else
1068  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
1069  return EXIT_FAILURE;
1070  #endif
1071  }
1072  }
1073  else
1074  {
1075  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
1076  << "'. Please include 'tflite' or 'onnx'";
1077  return EXIT_FAILURE;
1078  }
1079 }
ExecuteNetworkParams m_ExNetParams
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
LogSeverity
Definition: Utils.hpp:14

◆ MainImpl()

int MainImpl ( const ExecuteNetworkParams params,
const std::shared_ptr< armnn::IRuntime > &  runtime = nullptr 
)

Definition at line 379 of file ExecuteNetwork.cpp.

References ARMNN_LOG, CheckInferenceTimeThreshold(), InferenceModel< IParser, TDataType >::CreateWorkingMemHandle(), armnn::Float32, InferenceModel< IParser, TDataType >::GetInputQuantizationParams(), InferenceModel< IParser, TDataType >::GetInputSize(), AsyncCallbackManager::GetNewCallback(), AsyncCallbackManager::GetNotifiedCallback(), InferenceModel< IParser, TDataType >::GetOutputBindingInfo(), InferenceModel< IParser, TDataType >::GetOutputBindingInfos(), InferenceModel< IParser, TDataType >::GetOutputSize(), armnn::GetTimeDuration(), armnn::GetTimeNow(), ExecuteNetworkParams::m_AllowExpandedDims, Params::m_AllowExpandedDims, Params::m_AsyncEnabled, ExecuteNetworkParams::m_CachedNetworkFilePath, Params::m_CachedNetworkFilePath, ExecuteNetworkParams::m_ComputeDevices, Params::m_ComputeDevices, ExecuteNetworkParams::m_Concurrent, ExecuteNetworkParams::m_DequantizeOutput, ExecuteNetworkParams::m_DontPrintOutputs, ExecuteNetworkParams::m_DynamicBackendsPath, Params::m_DynamicBackendsPath, ExecuteNetworkParams::m_EnableBf16TurboMode, Params::m_EnableBf16TurboMode, ExecuteNetworkParams::m_EnableFastMath, Params::m_EnableFastMath, ExecuteNetworkParams::m_EnableFp16TurboMode, Params::m_EnableFp16TurboMode, ExecuteNetworkParams::m_EnableLayerDetails, ExecuteNetworkParams::m_EnableProfiling, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_ImportInputsIfAligned, Params::m_ImportInputsIfAligned, ExecuteNetworkParams::m_InferOutputShape, Params::m_InferOutputShape, Params::m_InputBindings, ExecuteNetworkParams::m_InputNames, Params::m_InputShapes, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_IsModelBinary, Params::m_IsModelBinary, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_MLGOTuningFilePath, Params::m_MLGOTuningFilePath, ExecuteNetworkParams::m_ModelPath, Params::m_ModelPath, ExecuteNetworkParams::m_NumberOfThreads, Params::m_NumberOfThreads, Params::m_OutputBindings, ExecuteNetworkParams::m_OutputDetailsOnlyToStdOut, Params::m_OutputDetailsOnlyToStdOut, ExecuteNetworkParams::m_OutputDetailsToStdOut, Params::m_OutputDetailsToStdOut, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, ExecuteNetworkParams::m_ParseUnsupported, Params::m_ParseUnsupported, ExecuteNetworkParams::m_PrintIntermediate, Params::m_PrintIntermediateLayers, ExecuteNetworkParams::m_QuantizeInput, ExecuteNetworkParams::m_ReuseBuffers, ExecuteNetworkParams::m_SaveCachedNetwork, Params::m_SaveCachedNetwork, ExecuteNetworkParams::m_SubgraphId, Params::m_SubgraphId, ExecuteNetworkParams::m_ThreadPoolSize, Params::m_ThreadPoolSize, ExecuteNetworkParams::m_ThresholdTime, Params::m_VisualizePostOptimizationModel, PopulateTensorWithData(), armnn::QAsymmS8, armnn::QAsymmU8, InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), armnn::Signed32, and Exception::what().

381 {
382  using namespace std::chrono;
383 
384  std::vector<std::vector<armnnUtils::TContainer>> inputs;
385  std::vector<std::vector<armnnUtils::TContainer>> outputs;
386 
387  try
388  {
389  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
390  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
391  inferenceModelParams.m_ModelPath = params.m_ModelPath;
392  inferenceModelParams.m_AllowExpandedDims = params.m_AllowExpandedDims;
393  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
394  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
395  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
396  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
397  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
398  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
399  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
400  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
401  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
402  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
403  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
404  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
405  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
406  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
407  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
408  inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut;
409  inferenceModelParams.m_ImportInputsIfAligned = params.m_ImportInputsIfAligned;
410 
411  for(const std::string& inputName: params.m_InputNames)
412  {
413  inferenceModelParams.m_InputBindings.push_back(inputName);
414  }
415 
416  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
417  {
418  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
419  }
420 
421  for(const std::string& outputName: params.m_OutputNames)
422  {
423  inferenceModelParams.m_OutputBindings.push_back(outputName);
424  }
425 
426  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
427  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
428  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
429 
430  InferenceModel<TParser, TDataType> model(inferenceModelParams,
431  params.m_EnableProfiling,
432  params.m_DynamicBackendsPath,
433  runtime);
434 
435  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
436 
438  armnn::MakeOptional<QuantizationParams>(
439  model.GetInputQuantizationParams()) :
441 
442  if (params.m_InputTensorDataFilePaths.size() > numInputs)
443  {
444  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
445  << "for each input. The user provided "
446  << params.m_InputTensorDataFilePaths.size()
447  << " input-tensor-data file/s which will be used to fill the input/s.\n";
448  }
449 
450  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
451 
452  // The user is allowed to specify the data type of each output tensor. It is used here to construct the
453  // result tensors for each iteration. It is possible for the user to specify a type that does not match
454  // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
455  // The potential problem here is a buffer overrun when a larger data type is written into the space for a
456  // smaller one. Issue a warning to highlight the potential problem.
457  for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
458  {
459  armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
460  switch (type)
461  {
462  // --output-type only supports float, int, qasymms8 or qasymmu8.
464  if (params.m_OutputTypes[outputIdx].compare("float") != 0)
465  {
466  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The "
467  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
468  ". This may cause unexpected problems or random failures.";
469  }
470  break;
472  if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
473  {
474  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The "
475  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
476  ". This may cause unexpected problems or random failures.";
477  }
478  break;
480  if (params.m_OutputTypes[outputIdx].compare("int") != 0)
481  {
482  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The "
483  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
484  ". This may cause unexpected problems or random failures.";
485  }
486  break;
488  if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
489  {
490  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The "
491  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
492  ". This may cause unexpected problems or random failures.";
493  }
494  break;
495  default:
496  break;
497  }
498  }
499 
500  if (!params.m_ReuseBuffers)
501  {
502  for (unsigned int j = 0; j < params.m_Iterations; ++j)
503  {
504  std::vector<armnnUtils::TContainer> inputDataContainers;
505  for (unsigned int i = 0; i < numInputs; ++i)
506  {
507  // If there are fewer input files given than required for the execution of
508  // params.m_Iterations we simply start with the first input file again
509  size_t inputFileIndex = j * numInputs + i;
510  if (!params.m_InputTensorDataFilePaths.empty())
511  {
512  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
513  }
514 
517  armnn::MakeOptional<std::string>(
518  params.m_InputTensorDataFilePaths.at(
519  inputFileIndex));
520 
521  unsigned int numElements = model.GetInputSize(i);
522  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
523  {
524  // If the user has provided a tensor shape for the current input,
525  // override numElements
526  numElements = params.m_InputTensorShapes[i]->GetNumElements();
527  }
528 
529  armnnUtils::TContainer tensorData;
530  PopulateTensorWithData(tensorData,
531  numElements,
532  params.m_InputTypes[i],
533  qParams,
534  dataFile);
535 
536  inputDataContainers.push_back(tensorData);
537  }
538  inputs.push_back(inputDataContainers);
539  }
540 
541  for (unsigned int j = 0; j < params.m_Iterations; ++j)
542  {
543  std::vector<armnnUtils::TContainer> outputDataContainers;
544  for (unsigned int i = 0; i < numOutputs; ++i)
545  {
546  if (params.m_OutputTypes[i].compare("float") == 0)
547  {
548  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
549  }
550  else if (params.m_OutputTypes[i].compare("int") == 0)
551  {
552  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
553  }
554  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
555  params.m_OutputTypes[i].compare("qasymmu8") == 0)
556  {
557  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
558  }
559  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
560  {
561  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
562  }
563  else
564  {
565  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
566  return EXIT_FAILURE;
567  }
568  }
569  outputs.push_back(outputDataContainers);
570  }
571  }
572  if (params.m_Iterations > 1)
573  {
574  std::stringstream msg;
575  msg << "Network will be executed " << params.m_Iterations;
576  if (params.m_Concurrent)
577  {
578  msg << " times in an asynchronous manner. ";
579  }
580  else
581  {
582  msg << " times successively. ";
583  }
584  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
585  "cover each execution.";
586  ARMNN_LOG(info) << msg.str();
587  }
588 
589  // Synchronous execution
590  if (!params.m_Concurrent && !params.m_ReuseBuffers)
591  {
592  for (size_t x = 0; x < params.m_Iterations; x++)
593  {
594  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
595  auto inference_duration = model.Run(inputs[x], outputs[x]);
596 
597  if (params.m_GenerateTensorData)
598  {
599  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
600  }
601  if (params.m_DontPrintOutputs)
602  {
603  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
604  }
605 
606  // Print output tensors
607  const auto& infosOut = model.GetOutputBindingInfos();
608  for (size_t i = 0; i < numOutputs; i++)
609  {
610  const armnn::TensorInfo& infoOut = infosOut[i].second;
611 
612  // We've made sure before that the number of output files either equals numOutputs, in which
613  // case we override those files when processing the results of each iteration (only the result
614  // of the last iteration will be stored), or there are enough
615  // output files for each output of each iteration.
616  size_t outputFileIndex = x * numOutputs + i;
617  if (!params.m_OutputTensorFiles.empty())
618  {
619  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
620  ARMNN_LOG(info) << "Writing output " << i << " named: '"
621  << inferenceModelParams.m_OutputBindings[i]
622  << "' of iteration: " << x+1 << " to file: '"
623  << params.m_OutputTensorFiles[outputFileIndex] << "'";
624  }
625  auto outputTensorFile = params.m_OutputTensorFiles.empty()
626  ? ""
627  : params.m_OutputTensorFiles[outputFileIndex];
628 
629  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
630  infoOut,
631  outputTensorFile,
632  params.m_DequantizeOutput,
633  !params.m_DontPrintOutputs);
634  mapbox::util::apply_visitor(printer, outputs[x][i]);
635  }
636 
637  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
638  << std::fixed << inference_duration.count() << " ms\n";
639 
640  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
641  if (params.m_ThresholdTime != 0.0)
642  {
643  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
644  << std::fixed << params.m_ThresholdTime << " ms";
645  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
646  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
647  << std::fixed << thresholdMinusInference << " ms" << "\n";
648 
649  if (thresholdMinusInference < 0)
650  {
651  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
652  ARMNN_LOG(fatal) << errorMessage;
653  }
654  }
655  }
656  }
657  // Synchronous Execution using a single buffer for input and output data
658  else if(!params.m_Concurrent)
659  {
660  std::vector<armnnUtils::TContainer> input;
661  std::vector<armnnUtils::TContainer> output;
662 
663  for (unsigned int i = 0; i < numInputs; ++i)
664  {
665  // If there are fewer input files given than required for the execution of
666  // params.m_Iterations we simply start with the first input file again
667  size_t inputFileIndex = numInputs + i;
668  if (!params.m_InputTensorDataFilePaths.empty())
669  {
670  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
671  }
672 
675  armnn::MakeOptional<std::string>(
676  params.m_InputTensorDataFilePaths.at(
677  inputFileIndex));
678 
679  unsigned int numElements = model.GetInputSize(i);
680  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
681  {
682  // If the user has provided a tensor shape for the current input,
683  // override numElements
684  numElements = params.m_InputTensorShapes[i]->GetNumElements();
685  }
686 
687  armnnUtils::TContainer tensorData;
688  PopulateTensorWithData(tensorData,
689  numElements,
690  params.m_InputTypes[i],
691  qParams,
692  dataFile);
693 
694  input.push_back(tensorData);
695  }
696 
697  for (unsigned int i = 0; i < numOutputs; ++i)
698  {
699  if (params.m_OutputTypes[i].compare("float") == 0)
700  {
701  output.push_back(std::vector<float>(model.GetOutputSize(i)));
702  } else if (params.m_OutputTypes[i].compare("int") == 0) {
703  output.push_back(std::vector<int>(model.GetOutputSize(i)));
704  } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
705  params.m_OutputTypes[i].compare("qasymmu8") == 0)
706  {
707  output.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
708  } else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
709  {
710  output.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
711  } else {
712  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
713  return EXIT_FAILURE;
714  }
715  }
716 
717  std::vector<std::chrono::duration<double, std::milli>> timings;
718  timings.reserve(params.m_Iterations);
719  for (size_t x = 0; x < params.m_Iterations; x++)
720  {
721  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
722  auto inference_duration = model.Run(input, output);
723  timings.push_back(inference_duration);
724  }
725 
726  if (params.m_GenerateTensorData)
727  {
728  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
729  }
730  if (params.m_DontPrintOutputs)
731  {
732  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
733  }
734 
735  // Print output. This only needs to happen once as input is the same for each iteration.
736  const auto &infosOut = model.GetOutputBindingInfos();
737  for (size_t i = 0; i < numOutputs; i++)
738  {
739  const armnn::TensorInfo &infoOut = infosOut[i].second;
740 
741  // We've made sure before that the number of output files either equals numOutputs, in which
742  // case we override those files when processing the results of each iteration (only the result
743  // of the last iteration will be stored), or there are enough
744  // output files for each output of each iteration.
745  size_t outputFileIndex = numOutputs + i;
746  if (!params.m_OutputTensorFiles.empty())
747  {
748  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
749  ARMNN_LOG(info) << "Writing output " << i << " named: '"
750  << inferenceModelParams.m_OutputBindings[i] <<" to file: '"
751  << params.m_OutputTensorFiles[outputFileIndex] << "'";
752  }
753  auto outputTensorFile = params.m_OutputTensorFiles.empty()
754  ? ""
755  : params.m_OutputTensorFiles[outputFileIndex];
756 
757  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
758  infoOut,
759  outputTensorFile,
760  params.m_DequantizeOutput,
761  !params.m_DontPrintOutputs);
762  mapbox::util::apply_visitor(printer, output[i]);
763  }
764 
765  for(auto inference: timings)
766  {
767 
768  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
769  << std::fixed << inference.count() << " ms\n";
770 
771  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
772  if (params.m_ThresholdTime != 0.0)
773  {
774  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
775  << std::fixed << params.m_ThresholdTime << " ms";
776  auto thresholdMinusInference = params.m_ThresholdTime - inference.count();
777  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
778  << std::fixed << thresholdMinusInference << " ms" << "\n";
779 
780  if (thresholdMinusInference < 0)
781  {
782  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
783  ARMNN_LOG(fatal) << errorMessage;
784  }
785  }
786  }
787  }
788 
789  // Asynchronous execution using the Arm NN thread pool
790  else if (params.m_ThreadPoolSize >= 1)
791  {
792  try
793  {
794  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
795  armnn::AsyncCallbackManager callbackManager;
796  std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
797 
798  // Declare the latest and earliest inference times here to be used when calculating overall time
799  std::chrono::high_resolution_clock::time_point earliestStartTime;
800  std::chrono::high_resolution_clock::time_point latestEndTime =
801  std::chrono::high_resolution_clock::now();
802 
803  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
804  // LoadedNetwork with each scheduled inference having a specific priority
805  for (size_t i = 0; i < params.m_Iterations; ++i)
806  {
807  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
808  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
809  model.RunAsync(inputs[i], outputs[i], cb);
810  }
811 
812  // Check the results
813  unsigned int j = 0;
814  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
815  {
816  auto cb = callbackManager.GetNotifiedCallback();
817 
818  // Get the results
819  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
820  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
821  auto inferenceDuration = endTime - startTime;
822 
823  if (latestEndTime < cb->GetEndTime())
824  {
825  latestEndTime = cb->GetEndTime();
826  }
827 
828  if (earliestStartTime.time_since_epoch().count() == 0)
829  {
830  earliestStartTime = cb->GetStartTime();
831  }
832  else if (earliestStartTime > cb->GetStartTime())
833  {
834  earliestStartTime = cb->GetStartTime();
835  }
836 
837  if (params.m_GenerateTensorData)
838  {
839  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
840  }
841  if (params.m_DontPrintOutputs)
842  {
843  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
844  }
845 
846  // Print output tensors
847  const auto& infosOut = model.GetOutputBindingInfos();
848  for (size_t i = 0; i < numOutputs; i++)
849  {
850  // We've made sure before that the number of output files either equals numOutputs, in which
851  // case we override those files when processing the results of each iteration (only the
852  // result of the last iteration will be stored), or there are enough
853  // output files for each output of each iteration.
854  size_t outputFileIndex = iteration * numOutputs + i;
855  if (!params.m_OutputTensorFiles.empty())
856  {
857  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
858  ARMNN_LOG(info) << "Writing output " << i << " named: '"
859  << inferenceModelParams.m_OutputBindings[i]
860  << "' of iteration: " << iteration+1 << " to file: '"
861  << params.m_OutputTensorFiles[outputFileIndex] << "'";
862  }
863 
864  const armnn::TensorInfo& infoOut = infosOut[i].second;
865  auto outputTensorFile = params.m_OutputTensorFiles.empty()
866  ? ""
867  : params.m_OutputTensorFiles[outputFileIndex];
868 
869  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
870  infoOut,
871  outputTensorFile,
872  params.m_DequantizeOutput,
873  !params.m_DontPrintOutputs);
874  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
875  }
876 
877  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
878  ++j;
879  }
880  //print duration difference between overallStartTime and overallEndTime
881  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
882  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
883  auto totalInferenceDuration = overallEndTime - overallStartTime;
884  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
885  << std::fixed << totalInferenceDuration.count() << " ms\n";
886  }
887  catch (const armnn::Exception& e)
888  {
889  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
890  return EXIT_FAILURE;
891  }
892  }
893  // Asynchronous execution using std::launch::async
894  else
895  {
896  try
897  {
898  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
899  std::vector<std::future<std::tuple<unsigned int,
900  std::chrono::duration<double, std::milli>>>> inferenceResults;
901  inferenceResults.reserve(params.m_Iterations);
902 
903  // Create WorkingMemHandles for each inference
904  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
905  workingMemHandles.reserve(params.m_Iterations);
906  for (unsigned int i = 0; i < params.m_Iterations; ++i)
907  {
908  workingMemHandles.push_back(model.CreateWorkingMemHandle());
909  }
910 
911  // Run each inference in its own thread
912  // start a timer
913  const auto start_time = armnn::GetTimeNow();
914  for (unsigned int i = 0; i < params.m_Iterations; ++i)
915  {
916  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
917 
918  inferenceResults.push_back(std::async(
919  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
920  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
921  }
922  ));
923  }
924 
925  // Check the results
926  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
927  {
928  // Get the results
929  auto inferenceResult = inferenceResults[j].get();
930  auto inferenceDuration = std::get<1>(inferenceResult);
931  auto inferenceID = std::get<0>(inferenceResult);
932 
933  if (params.m_GenerateTensorData)
934  {
935  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
936  }
937  if (params.m_DontPrintOutputs)
938  {
939  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
940  }
941 
942  // Print output tensors
943  const auto& infosOut = model.GetOutputBindingInfos();
944  for (size_t i = 0; i < numOutputs; i++)
945  {
946  // We've made sure before that the number of output files either equals numOutputs, in which
947  // case we override those files when processing the results of each iteration (only the
948  // result of the last iteration will be stored), or there are enough
949  // output files for each output of each iteration.
950  size_t outputFileIndex = j * numOutputs + i;
951  if (!params.m_OutputTensorFiles.empty())
952  {
953  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
954  ARMNN_LOG(info) << "Writing output " << i << " named: '"
955  << inferenceModelParams.m_OutputBindings[i]
956  << "' of iteration: " << j+1 << " to file: '"
957  << params.m_OutputTensorFiles[outputFileIndex] << "'";
958  }
959  const armnn::TensorInfo& infoOut = infosOut[i].second;
960  auto outputTensorFile = params.m_OutputTensorFiles.empty()
961  ? ""
962  : params.m_OutputTensorFiles[outputFileIndex];
963 
964  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
965  infoOut,
966  outputTensorFile,
967  params.m_DequantizeOutput,
968  !params.m_DontPrintOutputs);
969  mapbox::util::apply_visitor(printer, outputs[j][i]);
970  }
971  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
972  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
973  }
974  // finish timer
975  const auto duration = armnn::GetTimeDuration(start_time);
976  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
977  << std::fixed << duration.count() << " ms\n";
978  }
979  catch (const armnn::Exception& e)
980  {
981  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
982  return EXIT_FAILURE;
983  }
984  }
985  }
986  catch (const armnn::Exception& e)
987  {
988  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
989  return EXIT_FAILURE;
990  }
991 
992  return EXIT_SUCCESS;
993 }
std::vector< std::string > m_InputTypes
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< std::string > m_OutputTensorFiles
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
DataType
Definition: Types.hpp:48
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
std::vector< std::string > m_InputTensorDataFilePaths
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Definition: TContainer.hpp:18
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
Definition: Optional.hpp:305
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()