ArmNN
 22.02
ExecuteNetwork.cpp File Reference

Go to the source code of this file.

Functions

bool CheckInferenceTimeThreshold (const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
 Given a measured duration and a threshold time tell the user whether we succeeded or not. More...
 
template<typename TParser , typename TDataType >
int MainImpl (const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
 
int main (int argc, const char *argv[])
 

Function Documentation

◆ CheckInferenceTimeThreshold()

bool CheckInferenceTimeThreshold ( const std::chrono::duration< double, std::milli > &  duration,
const double &  thresholdTime 
)

Given a measured duration and a threshold time tell the user whether we succeeded or not.

Parameters
durationthe measured inference duration.
thresholdTimethe threshold time in milliseconds.
Returns
false if the measured time exceeded the threshold.

Definition at line 47 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, armnn::GetTimeDuration(), armnn::GetTimeNow(), ExecuteNetworkParams::m_DontPrintOutputs, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_InputNames, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_ModelPath, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, IRuntime::CreationOptions::m_ProfilingOptions, ExecuteNetworkParams::m_TfLiteExecutor, ExecuteNetworkParams::m_ThresholdTime, armnn::numeric_cast(), DelegateOptions::SetExternalProfilingParams(), armnnDelegate::TfLiteArmnnDelegateCreate(), and armnnDelegate::TfLiteArmnnDelegateDelete().

Referenced by MainImpl().

49 {
50  ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
51  << std::fixed << duration.count() << " ms\n";
52  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
53  if (thresholdTime != 0.0)
54  {
55  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
56  << std::fixed << thresholdTime << " ms";
57  auto thresholdMinusInference = thresholdTime - duration.count();
58  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
59  << std::fixed << thresholdMinusInference << " ms" << "\n";
60  if (thresholdMinusInference < 0)
61  {
62  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
63  ARMNN_LOG(fatal) << errorMessage;
64  return false;
65  }
66  }
67  return true;
68 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205

◆ main()

int main ( int  argc,
const char *  argv[] 
)

Definition at line 855 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, ExecuteNetworkParams::ArmNNTfLiteParser, armnn::ConfigureLogging(), IRuntime::Create(), armnn::Debug, armnn::Info, ExecuteNetworkParams::m_EnableProfiling, ProgramOptions::m_ExNetParams, ExecuteNetworkParams::m_ModelFormat, ExecuteNetworkParams::m_OutputDetailsOnlyToStdOut, ExecuteNetworkParams::m_OutputDetailsToStdOut, ProgramOptions::m_RuntimeOptions, ExecuteNetworkParams::m_TfLiteExecutor, ProgramOptions::ParseOptions(), and ExecuteNetworkParams::TfliteInterpreter.

856 {
857  // Configures logging for both the ARMNN library and this test program.
858  #ifdef NDEBUG
860  #else
862  #endif
863  armnn::ConfigureLogging(true, true, level);
864 
865 
866  // Get ExecuteNetwork parameters and runtime options from command line
867  // This might throw an InvalidArgumentException if the user provided invalid inputs
869  try {
870  ProgramOptions.ParseOptions(argc, argv);
871  } catch (const std::exception &e){
872  ARMNN_LOG(fatal) << e.what();
873  return EXIT_FAILURE;
874  }
875 
876  if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
878  && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
879  {
880  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
881  return EXIT_FAILURE;
882  }
883 
884  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
885 
886  // Forward to implementation based on the parser type
887  if (modelFormat.find("armnn") != std::string::npos)
888  {
889  #if defined(ARMNN_SERIALIZER)
890  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
891  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
892  #else
893  ARMNN_LOG(fatal) << "Not built with serialization support.";
894  return EXIT_FAILURE;
895  #endif
896  }
897  else if (modelFormat.find("onnx") != std::string::npos)
898  {
899  #if defined(ARMNN_ONNX_PARSER)
900  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
901  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
902  #else
903  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
904  return EXIT_FAILURE;
905  #endif
906  }
907  else if(modelFormat.find("tflite") != std::string::npos)
908  {
910  {
911  #if defined(ARMNN_TF_LITE_PARSER)
912  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
913  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
914  #else
915  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
916  return EXIT_FAILURE;
917  #endif
918  }
919  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
921  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
923  {
924  #if defined(ARMNN_TF_LITE_DELEGATE)
925  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
926  #else
927  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
928  return EXIT_FAILURE;
929  #endif
930  }
931  }
932  else
933  {
934  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
935  << "'. Please include 'tflite' or 'onnx'";
936  return EXIT_FAILURE;
937  }
938 }
ExecuteNetworkParams m_ExNetParams
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
LogSeverity
Definition: Utils.hpp:14

◆ MainImpl()

int MainImpl ( const ExecuteNetworkParams params,
const std::shared_ptr< armnn::IRuntime > &  runtime = nullptr 
)

Definition at line 377 of file ExecuteNetwork.cpp.

References ARMNN_LOG, CheckInferenceTimeThreshold(), InferenceModel< IParser, TDataType >::CreateWorkingMemHandle(), armnn::Float32, InferenceModel< IParser, TDataType >::GetInputQuantizationParams(), InferenceModel< IParser, TDataType >::GetInputSize(), AsyncCallbackManager::GetNewCallback(), AsyncCallbackManager::GetNotifiedCallback(), InferenceModel< IParser, TDataType >::GetOutputBindingInfo(), InferenceModel< IParser, TDataType >::GetOutputBindingInfos(), InferenceModel< IParser, TDataType >::GetOutputSize(), armnn::GetTimeDuration(), armnn::GetTimeNow(), Params::m_AsyncEnabled, ExecuteNetworkParams::m_CachedNetworkFilePath, Params::m_CachedNetworkFilePath, ExecuteNetworkParams::m_ComputeDevices, Params::m_ComputeDevices, ExecuteNetworkParams::m_Concurrent, ExecuteNetworkParams::m_DequantizeOutput, ExecuteNetworkParams::m_DontPrintOutputs, ExecuteNetworkParams::m_DynamicBackendsPath, Params::m_DynamicBackendsPath, ExecuteNetworkParams::m_EnableBf16TurboMode, Params::m_EnableBf16TurboMode, ExecuteNetworkParams::m_EnableFastMath, Params::m_EnableFastMath, ExecuteNetworkParams::m_EnableFp16TurboMode, Params::m_EnableFp16TurboMode, ExecuteNetworkParams::m_EnableLayerDetails, ExecuteNetworkParams::m_EnableProfiling, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_InferOutputShape, Params::m_InferOutputShape, Params::m_InputBindings, ExecuteNetworkParams::m_InputNames, Params::m_InputShapes, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_IsModelBinary, Params::m_IsModelBinary, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_MLGOTuningFilePath, Params::m_MLGOTuningFilePath, ExecuteNetworkParams::m_ModelPath, Params::m_ModelPath, ExecuteNetworkParams::m_NumberOfThreads, Params::m_NumberOfThreads, Params::m_OutputBindings, ExecuteNetworkParams::m_OutputDetailsOnlyToStdOut, Params::m_OutputDetailsOnlyToStdOut, ExecuteNetworkParams::m_OutputDetailsToStdOut, Params::m_OutputDetailsToStdOut, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, ExecuteNetworkParams::m_ParseUnsupported, Params::m_ParseUnsupported, ExecuteNetworkParams::m_PrintIntermediate, Params::m_PrintIntermediateLayers, ExecuteNetworkParams::m_QuantizeInput, ExecuteNetworkParams::m_SaveCachedNetwork, Params::m_SaveCachedNetwork, ExecuteNetworkParams::m_SubgraphId, Params::m_SubgraphId, ExecuteNetworkParams::m_ThreadPoolSize, Params::m_ThreadPoolSize, ExecuteNetworkParams::m_ThresholdTime, Params::m_VisualizePostOptimizationModel, PopulateTensorWithData(), armnn::QAsymmS8, armnn::QAsymmU8, InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), armnn::Signed32, and Exception::what().

379 {
380  using namespace std::chrono;
381 
382  std::vector<std::vector<armnnUtils::TContainer>> inputs;
383  std::vector<std::vector<armnnUtils::TContainer>> outputs;
384 
385  try
386  {
387  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
388  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
389  inferenceModelParams.m_ModelPath = params.m_ModelPath;
390  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
391  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
392  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
393  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
394  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
395  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
396  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
397  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
398  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
399  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
400  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
401  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
402  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
403  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
404  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
405  inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut;
406 
407  for(const std::string& inputName: params.m_InputNames)
408  {
409  inferenceModelParams.m_InputBindings.push_back(inputName);
410  }
411 
412  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
413  {
414  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
415  }
416 
417  for(const std::string& outputName: params.m_OutputNames)
418  {
419  inferenceModelParams.m_OutputBindings.push_back(outputName);
420  }
421 
422  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
423  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
424  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
425 
426  InferenceModel<TParser, TDataType> model(inferenceModelParams,
427  params.m_EnableProfiling,
428  params.m_DynamicBackendsPath,
429  runtime);
430 
431  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
432 
434  armnn::MakeOptional<QuantizationParams>(
435  model.GetInputQuantizationParams()) :
437 
438  if (params.m_InputTensorDataFilePaths.size() > numInputs)
439  {
440  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
441  << "for each input. The user provided "
442  << params.m_InputTensorDataFilePaths.size()
443  << " input-tensor-data file/s which will be used to fill the input/s.\n";
444  }
445 
446  for(unsigned int j = 0; j < params.m_Iterations ; ++j)
447  {
448  std::vector<armnnUtils::TContainer> inputDataContainers;
449  for(unsigned int i = 0; i < numInputs; ++i)
450  {
451  // If there are fewer input files given than required for the execution of
452  // params.m_Iterations we simply start with the first input file again
453  size_t inputFileIndex = j * numInputs + i;
454  if (!params.m_InputTensorDataFilePaths.empty())
455  {
456  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
457  }
458 
461  armnn::MakeOptional<std::string>(
462  params.m_InputTensorDataFilePaths.at(inputFileIndex));
463 
464  unsigned int numElements = model.GetInputSize(i);
465  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
466  {
467  // If the user has provided a tensor shape for the current input,
468  // override numElements
469  numElements = params.m_InputTensorShapes[i]->GetNumElements();
470  }
471 
472  armnnUtils::TContainer tensorData;
473  PopulateTensorWithData(tensorData,
474  numElements,
475  params.m_InputTypes[i],
476  qParams,
477  dataFile);
478 
479  inputDataContainers.push_back(tensorData);
480  }
481  inputs.push_back(inputDataContainers);
482  }
483 
484  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
485 
486  // The user is allowed to specify the data type of each output tensor. It is used here to construct the
487  // result tensors for each iteration. It is possible for the user to specify a type that does not match
488  // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
489  // The potential problem here is a buffer overrun when a larger data type is written into the space for a
490  // smaller one. Issue a warning to highlight the potential problem.
491  for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
492  {
493  armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
494  switch (type)
495  {
496  // --output-type only supports float, int, qasymms8 or qasymmu8.
498  if (params.m_OutputTypes[outputIdx].compare("float") != 0)
499  {
500  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " <<
501  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
502  ". This may cause unexpected problems or random failures.";
503  }
504  break;
506  if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
507  {
508  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " <<
509  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
510  ". This may cause unexpected problemsor random failures.";
511  }
512  break;
514  if (params.m_OutputTypes[outputIdx].compare("int") != 0)
515  {
516  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " <<
517  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
518  ". This may cause unexpected problems or random failures.";
519  }
520  break;
522  if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
523  {
524  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " <<
525  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
526  ". This may cause unexpected problems or random failures.";
527  }
528  break;
529  default:
530  break;
531  }
532  }
533  for (unsigned int j = 0; j < params.m_Iterations; ++j)
534  {
535  std::vector <armnnUtils::TContainer> outputDataContainers;
536  for (unsigned int i = 0; i < numOutputs; ++i)
537  {
538  if (params.m_OutputTypes[i].compare("float") == 0)
539  {
540  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
541  }
542  else if (params.m_OutputTypes[i].compare("int") == 0)
543  {
544  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
545  }
546  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
547  params.m_OutputTypes[i].compare("qasymmu8") == 0)
548  {
549  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
550  }
551  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
552  {
553  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
554  } else
555  {
556  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
557  return EXIT_FAILURE;
558  }
559  }
560  outputs.push_back(outputDataContainers);
561  }
562 
563  if (params.m_Iterations > 1)
564  {
565  std::stringstream msg;
566  msg << "Network will be executed " << params.m_Iterations;
567  if (params.m_Concurrent)
568  {
569  msg << " times in an asynchronous manner. ";
570  }
571  else
572  {
573  msg << " times successively. ";
574  }
575  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
576  "cover each execution.";
577  ARMNN_LOG(info) << msg.str();
578  }
579 
580  // Synchronous execution
581  if (!params.m_Concurrent)
582  {
583  for (size_t x = 0; x < params.m_Iterations; x++)
584  {
585  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
586  auto inference_duration = model.Run(inputs[x], outputs[x]);
587 
588  if (params.m_GenerateTensorData)
589  {
590  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
591  }
592  if (params.m_DontPrintOutputs)
593  {
594  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
595  }
596 
597  // Print output tensors
598  const auto& infosOut = model.GetOutputBindingInfos();
599  for (size_t i = 0; i < numOutputs; i++)
600  {
601  const armnn::TensorInfo& infoOut = infosOut[i].second;
602 
603  // We've made sure before that the number of output files either equals numOutputs, in which
604  // case we override those files when processing the results of each iteration (only the result
605  // of the last iteration will be stored), or there are enough
606  // output files for each output of each iteration.
607  size_t outputFileIndex = x * numOutputs + i;
608  if (!params.m_OutputTensorFiles.empty())
609  {
610  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
611  ARMNN_LOG(info) << "Writing output " << i << " named: '"
612  << inferenceModelParams.m_OutputBindings[i]
613  << "' of iteration: " << x+1 << " to file: '"
614  << params.m_OutputTensorFiles[outputFileIndex] << "'";
615  }
616  auto outputTensorFile = params.m_OutputTensorFiles.empty()
617  ? ""
618  : params.m_OutputTensorFiles[outputFileIndex];
619 
620  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
621  infoOut,
622  outputTensorFile,
623  params.m_DequantizeOutput,
624  !params.m_DontPrintOutputs);
625  mapbox::util::apply_visitor(printer, outputs[x][i]);
626  }
627 
628  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
629  << std::fixed << inference_duration.count() << " ms\n";
630 
631  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
632  if (params.m_ThresholdTime != 0.0)
633  {
634  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
635  << std::fixed << params.m_ThresholdTime << " ms";
636  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
637  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
638  << std::fixed << thresholdMinusInference << " ms" << "\n";
639 
640  if (thresholdMinusInference < 0)
641  {
642  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
643  ARMNN_LOG(fatal) << errorMessage;
644  }
645  }
646  }
647  }
648  // Asynchronous execution using the Arm NN thread pool
649  else if (params.m_ThreadPoolSize >= 1)
650  {
651  try
652  {
653  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
654  armnn::AsyncCallbackManager callbackManager;
655  std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
656 
657  // Declare the latest and earliest inference times here to be used when calculating overall time
658  std::chrono::high_resolution_clock::time_point earliestStartTime;
659  std::chrono::high_resolution_clock::time_point latestEndTime =
660  std::chrono::high_resolution_clock::now();
661 
662  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
663  // LoadedNetwork with each scheduled inference having a specific priority
664  for (size_t i = 0; i < params.m_Iterations; ++i)
665  {
666  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
667  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
668  model.RunAsync(inputs[i], outputs[i], cb);
669  }
670 
671  // Check the results
672  unsigned int j = 0;
673  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
674  {
675  auto cb = callbackManager.GetNotifiedCallback();
676 
677  // Get the results
678  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
679  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
680  auto inferenceDuration = endTime - startTime;
681 
682  if (latestEndTime < cb->GetEndTime())
683  {
684  latestEndTime = cb->GetEndTime();
685  }
686 
687  if (earliestStartTime.time_since_epoch().count() == 0)
688  {
689  earliestStartTime = cb->GetStartTime();
690  }
691  else if (earliestStartTime > cb->GetStartTime())
692  {
693  earliestStartTime = cb->GetStartTime();
694  }
695 
696  if (params.m_GenerateTensorData)
697  {
698  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
699  }
700  if (params.m_DontPrintOutputs)
701  {
702  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
703  }
704 
705  // Print output tensors
706  const auto& infosOut = model.GetOutputBindingInfos();
707  for (size_t i = 0; i < numOutputs; i++)
708  {
709  // We've made sure before that the number of output files either equals numOutputs, in which
710  // case we override those files when processing the results of each iteration (only the
711  // result of the last iteration will be stored), or there are enough
712  // output files for each output of each iteration.
713  size_t outputFileIndex = iteration * numOutputs + i;
714  if (!params.m_OutputTensorFiles.empty())
715  {
716  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
717  ARMNN_LOG(info) << "Writing output " << i << " named: '"
718  << inferenceModelParams.m_OutputBindings[i]
719  << "' of iteration: " << iteration+1 << " to file: '"
720  << params.m_OutputTensorFiles[outputFileIndex] << "'";
721  }
722 
723  const armnn::TensorInfo& infoOut = infosOut[i].second;
724  auto outputTensorFile = params.m_OutputTensorFiles.empty()
725  ? ""
726  : params.m_OutputTensorFiles[outputFileIndex];
727 
728  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
729  infoOut,
730  outputTensorFile,
731  params.m_DequantizeOutput,
732  !params.m_DontPrintOutputs);
733  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
734  }
735 
736  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
737  ++j;
738  }
739  //print duration difference between overallStartTime and overallEndTime
740  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
741  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
742  auto totalInferenceDuration = overallEndTime - overallStartTime;
743  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
744  << std::fixed << totalInferenceDuration.count() << " ms\n";
745  }
746  catch (const armnn::Exception& e)
747  {
748  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
749  return EXIT_FAILURE;
750  }
751  }
752  // Asynchronous execution using std::launch::async
753  else
754  {
755  try
756  {
757  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
758  std::vector<std::future<std::tuple<unsigned int,
759  std::chrono::duration<double, std::milli>>>> inferenceResults;
760  inferenceResults.reserve(params.m_Iterations);
761 
762  // Create WorkingMemHandles for each inference
763  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
764  workingMemHandles.reserve(params.m_Iterations);
765  for (unsigned int i = 0; i < params.m_Iterations; ++i)
766  {
767  workingMemHandles.push_back(model.CreateWorkingMemHandle());
768  }
769 
770  // Run each inference in its own thread
771  // start a timer
772  const auto start_time = armnn::GetTimeNow();
773  for (unsigned int i = 0; i < params.m_Iterations; ++i)
774  {
775  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
776 
777  inferenceResults.push_back(std::async(
778  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
779  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
780  }
781  ));
782  }
783 
784  // Check the results
785  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
786  {
787  // Get the results
788  auto inferenceResult = inferenceResults[j].get();
789  auto inferenceDuration = std::get<1>(inferenceResult);
790  auto inferenceID = std::get<0>(inferenceResult);
791 
792  if (params.m_GenerateTensorData)
793  {
794  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
795  }
796  if (params.m_DontPrintOutputs)
797  {
798  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
799  }
800 
801  // Print output tensors
802  const auto& infosOut = model.GetOutputBindingInfos();
803  for (size_t i = 0; i < numOutputs; i++)
804  {
805  // We've made sure before that the number of output files either equals numOutputs, in which
806  // case we override those files when processing the results of each iteration (only the
807  // result of the last iteration will be stored), or there are enough
808  // output files for each output of each iteration.
809  size_t outputFileIndex = j * numOutputs + i;
810  if (!params.m_OutputTensorFiles.empty())
811  {
812  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
813  ARMNN_LOG(info) << "Writing output " << i << " named: '"
814  << inferenceModelParams.m_OutputBindings[i]
815  << "' of iteration: " << j+1 << " to file: '"
816  << params.m_OutputTensorFiles[outputFileIndex] << "'";
817  }
818  const armnn::TensorInfo& infoOut = infosOut[i].second;
819  auto outputTensorFile = params.m_OutputTensorFiles.empty()
820  ? ""
821  : params.m_OutputTensorFiles[outputFileIndex];
822 
823  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
824  infoOut,
825  outputTensorFile,
826  params.m_DequantizeOutput,
827  !params.m_DontPrintOutputs);
828  mapbox::util::apply_visitor(printer, outputs[j][i]);
829  }
830  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
831  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
832  }
833  // finish timer
834  const auto duration = armnn::GetTimeDuration(start_time);
835  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
836  << std::fixed << duration.count() << " ms\n";
837  }
838  catch (const armnn::Exception& e)
839  {
840  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
841  return EXIT_FAILURE;
842  }
843  }
844  }
845  catch (const armnn::Exception& e)
846  {
847  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
848  return EXIT_FAILURE;
849  }
850 
851  return EXIT_SUCCESS;
852 }
std::vector< std::string > m_InputTypes
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< std::string > m_OutputTensorFiles
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
DataType
Definition: Types.hpp:35
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
std::vector< std::string > m_InputTensorDataFilePaths
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Definition: TContainer.hpp:18
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
Definition: Optional.hpp:305
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()