ArmNN
 21.11
ExecuteNetwork.cpp File Reference

Go to the source code of this file.

Functions

bool CheckInferenceTimeThreshold (const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
 Given a measured duration and a threshold time tell the user whether we succeeded or not. More...
 
template<typename TParser , typename TDataType >
int MainImpl (const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
 
int main (int argc, const char *argv[])
 

Function Documentation

◆ CheckInferenceTimeThreshold()

bool CheckInferenceTimeThreshold ( const std::chrono::duration< double, std::milli > &  duration,
const double &  thresholdTime 
)

Given a measured duration and a threshold time tell the user whether we succeeded or not.

Parameters
durationthe measured inference duration.
thresholdTimethe threshold time in milliseconds.
Returns
false if the measured time exceeded the threshold.

Definition at line 47 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, armnn::GetTimeDuration(), armnn::GetTimeNow(), ExecuteNetworkParams::m_DontPrintOutputs, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_InputNames, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_ModelPath, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, IRuntime::CreationOptions::m_ProfilingOptions, ExecuteNetworkParams::m_TfLiteExecutor, ExecuteNetworkParams::m_ThresholdTime, armnn::numeric_cast(), DelegateOptions::SetExternalProfilingParams(), armnnDelegate::TfLiteArmnnDelegateCreate(), and armnnDelegate::TfLiteArmnnDelegateDelete().

Referenced by MainImpl().

49 {
50  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
51  << std::fixed << duration.count() << " ms\n";
52  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
53  if (thresholdTime != 0.0)
54  {
55  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
56  << std::fixed << thresholdTime << " ms";
57  auto thresholdMinusInference = thresholdTime - duration.count();
58  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
59  << std::fixed << thresholdMinusInference << " ms" << "\n";
60  if (thresholdMinusInference < 0)
61  {
62  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
63  ARMNN_LOG(fatal) << errorMessage;
64  return false;
65  }
66  }
67  return true;
68 }
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202

◆ main()

int main ( int  argc,
const char *  argv[] 
)

Definition at line 846 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, ExecuteNetworkParams::ArmNNTfLiteParser, armnn::ConfigureLogging(), IRuntime::Create(), armnn::Debug, armnn::Info, ExecuteNetworkParams::m_EnableProfiling, ProgramOptions::m_ExNetParams, ExecuteNetworkParams::m_ModelFormat, ExecuteNetworkParams::m_OutputDetailsOnlyToStdOut, ExecuteNetworkParams::m_OutputDetailsToStdOut, ProgramOptions::m_RuntimeOptions, ExecuteNetworkParams::m_TfLiteExecutor, ProgramOptions::ParseOptions(), and ExecuteNetworkParams::TfliteInterpreter.

847 {
848  // Configures logging for both the ARMNN library and this test program.
849  #ifdef NDEBUG
851  #else
853  #endif
854  armnn::ConfigureLogging(true, true, level);
855 
856 
857  // Get ExecuteNetwork parameters and runtime options from command line
858  // This might throw an InvalidArgumentException if the user provided invalid inputs
860  try {
861  ProgramOptions.ParseOptions(argc, argv);
862  } catch (const std::exception &e){
863  ARMNN_LOG(fatal) << e.what();
864  return EXIT_FAILURE;
865  }
866 
867  if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
869  && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
870  {
871  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
872  return EXIT_FAILURE;
873  }
874 
875  // Create runtime
876  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
877 
878  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
879 
880  // Forward to implementation based on the parser type
881  if (modelFormat.find("armnn") != std::string::npos)
882  {
883  #if defined(ARMNN_SERIALIZER)
884  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
885  #else
886  ARMNN_LOG(fatal) << "Not built with serialization support.";
887  return EXIT_FAILURE;
888  #endif
889  }
890  else if (modelFormat.find("onnx") != std::string::npos)
891  {
892  #if defined(ARMNN_ONNX_PARSER)
893  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
894  #else
895  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
896  return EXIT_FAILURE;
897  #endif
898  }
899  else if(modelFormat.find("tflite") != std::string::npos)
900  {
902  {
903  #if defined(ARMNN_TF_LITE_PARSER)
904  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
905  #else
906  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
907  return EXIT_FAILURE;
908  #endif
909  }
910  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
912  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
914  {
915  #if defined(ARMNN_TF_LITE_DELEGATE)
916  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
917  #else
918  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
919  return EXIT_FAILURE;
920  #endif
921  }
922  }
923  else
924  {
925  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
926  << "'. Please include 'tflite' or 'onnx'";
927  return EXIT_FAILURE;
928  }
929 }
ExecuteNetworkParams m_ExNetParams
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
LogSeverity
Definition: Utils.hpp:14

◆ MainImpl()

int MainImpl ( const ExecuteNetworkParams params,
const std::shared_ptr< armnn::IRuntime > &  runtime = nullptr 
)

Definition at line 368 of file ExecuteNetwork.cpp.

References ARMNN_LOG, CheckInferenceTimeThreshold(), InferenceModel< IParser, TDataType >::CreateWorkingMemHandle(), armnn::Float32, InferenceModel< IParser, TDataType >::GetInputQuantizationParams(), InferenceModel< IParser, TDataType >::GetInputSize(), AsyncCallbackManager::GetNewCallback(), AsyncCallbackManager::GetNotifiedCallback(), InferenceModel< IParser, TDataType >::GetOutputBindingInfo(), InferenceModel< IParser, TDataType >::GetOutputBindingInfos(), InferenceModel< IParser, TDataType >::GetOutputSize(), armnn::GetTimeDuration(), armnn::GetTimeNow(), Params::m_AsyncEnabled, ExecuteNetworkParams::m_CachedNetworkFilePath, Params::m_CachedNetworkFilePath, ExecuteNetworkParams::m_ComputeDevices, Params::m_ComputeDevices, ExecuteNetworkParams::m_Concurrent, ExecuteNetworkParams::m_DequantizeOutput, ExecuteNetworkParams::m_DontPrintOutputs, ExecuteNetworkParams::m_DynamicBackendsPath, Params::m_DynamicBackendsPath, ExecuteNetworkParams::m_EnableBf16TurboMode, Params::m_EnableBf16TurboMode, ExecuteNetworkParams::m_EnableFastMath, Params::m_EnableFastMath, ExecuteNetworkParams::m_EnableFp16TurboMode, Params::m_EnableFp16TurboMode, ExecuteNetworkParams::m_EnableLayerDetails, ExecuteNetworkParams::m_EnableProfiling, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_InferOutputShape, Params::m_InferOutputShape, Params::m_InputBindings, ExecuteNetworkParams::m_InputNames, Params::m_InputShapes, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_IsModelBinary, Params::m_IsModelBinary, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_MLGOTuningFilePath, Params::m_MLGOTuningFilePath, ExecuteNetworkParams::m_ModelPath, Params::m_ModelPath, ExecuteNetworkParams::m_NumberOfThreads, Params::m_NumberOfThreads, Params::m_OutputBindings, ExecuteNetworkParams::m_OutputDetailsOnlyToStdOut, Params::m_OutputDetailsOnlyToStdOut, ExecuteNetworkParams::m_OutputDetailsToStdOut, Params::m_OutputDetailsToStdOut, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, ExecuteNetworkParams::m_ParseUnsupported, Params::m_ParseUnsupported, ExecuteNetworkParams::m_PrintIntermediate, Params::m_PrintIntermediateLayers, ExecuteNetworkParams::m_QuantizeInput, ExecuteNetworkParams::m_SaveCachedNetwork, Params::m_SaveCachedNetwork, ExecuteNetworkParams::m_SubgraphId, Params::m_SubgraphId, ExecuteNetworkParams::m_ThreadPoolSize, Params::m_ThreadPoolSize, ExecuteNetworkParams::m_ThresholdTime, Params::m_VisualizePostOptimizationModel, PopulateTensorWithData(), armnn::QAsymmS8, armnn::QAsymmU8, InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), armnn::Signed32, and Exception::what().

370 {
371  using namespace std::chrono;
372 
373  std::vector<std::vector<armnnUtils::TContainer>> inputs;
374  std::vector<std::vector<armnnUtils::TContainer>> outputs;
375 
376  try
377  {
378  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
379  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
380  inferenceModelParams.m_ModelPath = params.m_ModelPath;
381  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
382  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
383  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
384  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
385  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
386  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
387  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
388  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
389  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
390  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
391  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
392  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
393  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
394  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
395  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
396  inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut;
397 
398  for(const std::string& inputName: params.m_InputNames)
399  {
400  inferenceModelParams.m_InputBindings.push_back(inputName);
401  }
402 
403  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
404  {
405  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
406  }
407 
408  for(const std::string& outputName: params.m_OutputNames)
409  {
410  inferenceModelParams.m_OutputBindings.push_back(outputName);
411  }
412 
413  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
414  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
415  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
416 
417  InferenceModel<TParser, TDataType> model(inferenceModelParams,
418  params.m_EnableProfiling,
419  params.m_DynamicBackendsPath,
420  runtime);
421 
422  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
423 
425  armnn::MakeOptional<QuantizationParams>(
426  model.GetInputQuantizationParams()) :
428 
429  if (params.m_InputTensorDataFilePaths.size() > numInputs)
430  {
431  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
432  << "for each input. The user provided "
433  << params.m_InputTensorDataFilePaths.size()
434  << " input-tensor-data file/s which will be used to fill the input/s.\n";
435  }
436 
437  for(unsigned int j = 0; j < params.m_Iterations ; ++j)
438  {
439  std::vector<armnnUtils::TContainer> inputDataContainers;
440  for(unsigned int i = 0; i < numInputs; ++i)
441  {
442  // If there are less input files given than required for the execution of
443  // params.m_Iterations we simply start with the first input file again
444  size_t inputFileIndex = j * numInputs + i;
445  if (!params.m_InputTensorDataFilePaths.empty())
446  {
447  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
448  }
449 
452  armnn::MakeOptional<std::string>(
453  params.m_InputTensorDataFilePaths.at(inputFileIndex));
454 
455  unsigned int numElements = model.GetInputSize(i);
456  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
457  {
458  // If the user has provided a tensor shape for the current input,
459  // override numElements
460  numElements = params.m_InputTensorShapes[i]->GetNumElements();
461  }
462 
463  armnnUtils::TContainer tensorData;
464  PopulateTensorWithData(tensorData,
465  numElements,
466  params.m_InputTypes[i],
467  qParams,
468  dataFile);
469 
470  inputDataContainers.push_back(tensorData);
471  }
472  inputs.push_back(inputDataContainers);
473  }
474 
475  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
476 
477  // The user is allowed to specify the data type of each output tensor. It is used here to construct the
478  // result tensors for each iteration. It is possible for the user to specify a type that does not match
479  // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
480  // The potential problem here is a buffer overrun when a larger data type is written into the space for a
481  // smaller one. Issue a warning to highlight the potential problem.
482  for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
483  {
484  armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
485  switch (type)
486  {
487  // --output-type only supports float, int, qasymms8 or qasymmu8.
489  if (params.m_OutputTypes[outputIdx].compare("float") != 0)
490  {
491  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " <<
492  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
493  ". This may cause unexpected problems or random failures.";
494  }
495  break;
497  if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
498  {
499  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " <<
500  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
501  ". This may cause unexpected problemsor random failures.";
502  }
503  break;
505  if (params.m_OutputTypes[outputIdx].compare("int") != 0)
506  {
507  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " <<
508  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
509  ". This may cause unexpected problems or random failures.";
510  }
511  break;
513  if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
514  {
515  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " <<
516  "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
517  ". This may cause unexpected problems or random failures.";
518  }
519  break;
520  default:
521  break;
522  }
523  }
524  for (unsigned int j = 0; j < params.m_Iterations; ++j)
525  {
526  std::vector <armnnUtils::TContainer> outputDataContainers;
527  for (unsigned int i = 0; i < numOutputs; ++i)
528  {
529  if (params.m_OutputTypes[i].compare("float") == 0)
530  {
531  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
532  }
533  else if (params.m_OutputTypes[i].compare("int") == 0)
534  {
535  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
536  }
537  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
538  params.m_OutputTypes[i].compare("qasymmu8") == 0)
539  {
540  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
541  }
542  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
543  {
544  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
545  } else
546  {
547  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
548  return EXIT_FAILURE;
549  }
550  }
551  outputs.push_back(outputDataContainers);
552  }
553 
554  if (params.m_Iterations > 1)
555  {
556  std::stringstream msg;
557  msg << "Network will be executed " << params.m_Iterations;
558  if (params.m_Concurrent)
559  {
560  msg << " times in an asynchronous manner. ";
561  }
562  else
563  {
564  msg << " times successively. ";
565  }
566  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
567  "cover each execution.";
568  ARMNN_LOG(info) << msg.str();
569  }
570 
571  // Synchronous execution
572  if (!params.m_Concurrent)
573  {
574  for (size_t x = 0; x < params.m_Iterations; x++)
575  {
576  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
577  auto inference_duration = model.Run(inputs[x], outputs[x]);
578 
579  if (params.m_GenerateTensorData)
580  {
581  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
582  }
583  if (params.m_DontPrintOutputs)
584  {
585  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
586  }
587 
588  // Print output tensors
589  const auto& infosOut = model.GetOutputBindingInfos();
590  for (size_t i = 0; i < numOutputs; i++)
591  {
592  const armnn::TensorInfo& infoOut = infosOut[i].second;
593 
594  // We've made sure before that the number of output files either equals numOutputs, in which
595  // case we override those files when processing the results of each iteration (only the result
596  // of the last iteration will be stored), or there are enough
597  // output files for each output of each iteration.
598  size_t outputFileIndex = x * numOutputs + i;
599  if (!params.m_OutputTensorFiles.empty())
600  {
601  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
602  ARMNN_LOG(info) << "Writing output " << i << " named: '"
603  << inferenceModelParams.m_OutputBindings[i]
604  << "' of iteration: " << x+1 << " to file: '"
605  << params.m_OutputTensorFiles[outputFileIndex] << "'";
606  }
607  auto outputTensorFile = params.m_OutputTensorFiles.empty()
608  ? ""
609  : params.m_OutputTensorFiles[outputFileIndex];
610 
611  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
612  infoOut,
613  outputTensorFile,
614  params.m_DequantizeOutput,
615  !params.m_DontPrintOutputs);
616  mapbox::util::apply_visitor(printer, outputs[x][i]);
617  }
618 
619  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
620  << std::fixed << inference_duration.count() << " ms\n";
621 
622  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
623  if (params.m_ThresholdTime != 0.0)
624  {
625  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
626  << std::fixed << params.m_ThresholdTime << " ms";
627  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
628  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
629  << std::fixed << thresholdMinusInference << " ms" << "\n";
630 
631  if (thresholdMinusInference < 0)
632  {
633  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
634  ARMNN_LOG(fatal) << errorMessage;
635  }
636  }
637  }
638  }
639  // Asynchronous execution using the Arm NN thread pool
640  else if (params.m_ThreadPoolSize >= 1)
641  {
642  try
643  {
644  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
645  armnn::AsyncCallbackManager callbackManager;
646  std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
647 
648  // Declare the latest and earliest inference times here to be used when calculating overall time
649  std::chrono::high_resolution_clock::time_point earliestStartTime;
650  std::chrono::high_resolution_clock::time_point latestEndTime =
651  std::chrono::high_resolution_clock::now();
652 
653  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
654  // LoadedNetwork with each scheduled inference having a specific priority
655  for (size_t i = 0; i < params.m_Iterations; ++i)
656  {
657  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
658  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
659  model.RunAsync(inputs[i], outputs[i], cb);
660  }
661 
662  // Check the results
663  unsigned int j = 0;
664  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
665  {
666  auto cb = callbackManager.GetNotifiedCallback();
667 
668  // Get the results
669  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
670  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
671  auto inferenceDuration = endTime - startTime;
672 
673  if (latestEndTime < cb->GetEndTime())
674  {
675  latestEndTime = cb->GetEndTime();
676  }
677 
678  if (earliestStartTime.time_since_epoch().count() == 0)
679  {
680  earliestStartTime = cb->GetStartTime();
681  }
682  else if (earliestStartTime > cb->GetStartTime())
683  {
684  earliestStartTime = cb->GetStartTime();
685  }
686 
687  if (params.m_GenerateTensorData)
688  {
689  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
690  }
691  if (params.m_DontPrintOutputs)
692  {
693  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
694  }
695 
696  // Print output tensors
697  const auto& infosOut = model.GetOutputBindingInfos();
698  for (size_t i = 0; i < numOutputs; i++)
699  {
700  // We've made sure before that the number of output files either equals numOutputs, in which
701  // case we override those files when processing the results of each iteration (only the
702  // result of the last iteration will be stored), or there are enough
703  // output files for each output of each iteration.
704  size_t outputFileIndex = iteration * numOutputs + i;
705  if (!params.m_OutputTensorFiles.empty())
706  {
707  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
708  ARMNN_LOG(info) << "Writing output " << i << " named: '"
709  << inferenceModelParams.m_OutputBindings[i]
710  << "' of iteration: " << iteration+1 << " to file: '"
711  << params.m_OutputTensorFiles[outputFileIndex] << "'";
712  }
713 
714  const armnn::TensorInfo& infoOut = infosOut[i].second;
715  auto outputTensorFile = params.m_OutputTensorFiles.empty()
716  ? ""
717  : params.m_OutputTensorFiles[outputFileIndex];
718 
719  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
720  infoOut,
721  outputTensorFile,
722  params.m_DequantizeOutput,
723  !params.m_DontPrintOutputs);
724  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
725  }
726 
727  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
728  ++j;
729  }
730  //print duration difference between overallStartTime and overallEndTime
731  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
732  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
733  auto totalInferenceDuration = overallEndTime - overallStartTime;
734  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
735  << std::fixed << totalInferenceDuration.count() << " ms\n";
736  }
737  catch (const armnn::Exception& e)
738  {
739  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
740  return EXIT_FAILURE;
741  }
742  }
743  // Asynchronous execution using std::launch::async
744  else
745  {
746  try
747  {
748  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
749  std::vector<std::future<std::tuple<unsigned int,
750  std::chrono::duration<double, std::milli>>>> inferenceResults;
751  inferenceResults.reserve(params.m_Iterations);
752 
753  // Create WorkingMemHandles for each inference
754  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
755  workingMemHandles.reserve(params.m_Iterations);
756  for (unsigned int i = 0; i < params.m_Iterations; ++i)
757  {
758  workingMemHandles.push_back(model.CreateWorkingMemHandle());
759  }
760 
761  // Run each inference in its own thread
762  // start a timer
763  const auto start_time = armnn::GetTimeNow();
764  for (unsigned int i = 0; i < params.m_Iterations; ++i)
765  {
766  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
767 
768  inferenceResults.push_back(std::async(
769  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
770  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
771  }
772  ));
773  }
774 
775  // Check the results
776  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
777  {
778  // Get the results
779  auto inferenceResult = inferenceResults[j].get();
780  auto inferenceDuration = std::get<1>(inferenceResult);
781  auto inferenceID = std::get<0>(inferenceResult);
782 
783  if (params.m_GenerateTensorData)
784  {
785  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
786  }
787  if (params.m_DontPrintOutputs)
788  {
789  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
790  }
791 
792  // Print output tensors
793  const auto& infosOut = model.GetOutputBindingInfos();
794  for (size_t i = 0; i < numOutputs; i++)
795  {
796  // We've made sure before that the number of output files either equals numOutputs, in which
797  // case we override those files when processing the results of each iteration (only the
798  // result of the last iteration will be stored), or there are enough
799  // output files for each output of each iteration.
800  size_t outputFileIndex = j * numOutputs + i;
801  if (!params.m_OutputTensorFiles.empty())
802  {
803  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
804  ARMNN_LOG(info) << "Writing output " << i << " named: '"
805  << inferenceModelParams.m_OutputBindings[i]
806  << "' of iteration: " << j+1 << " to file: '"
807  << params.m_OutputTensorFiles[outputFileIndex] << "'";
808  }
809  const armnn::TensorInfo& infoOut = infosOut[i].second;
810  auto outputTensorFile = params.m_OutputTensorFiles.empty()
811  ? ""
812  : params.m_OutputTensorFiles[outputFileIndex];
813 
814  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
815  infoOut,
816  outputTensorFile,
817  params.m_DequantizeOutput,
818  !params.m_DontPrintOutputs);
819  mapbox::util::apply_visitor(printer, outputs[j][i]);
820  }
821  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
822  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
823  }
824  // finish timer
825  const auto duration = armnn::GetTimeDuration(start_time);
826  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
827  << std::fixed << duration.count() << " ms\n";
828  }
829  catch (const armnn::Exception& e)
830  {
831  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
832  return EXIT_FAILURE;
833  }
834  }
835  }
836  catch (const armnn::Exception& e)
837  {
838  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
839  return EXIT_FAILURE;
840  }
841 
842  return EXIT_SUCCESS;
843 }
std::vector< std::string > m_InputTypes
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< std::string > m_OutputTensorFiles
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
DataType
Definition: Types.hpp:35
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
std::vector< std::string > m_InputTensorDataFilePaths
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Definition: TContainer.hpp:18
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
Definition: Optional.hpp:305
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()