ArmNN
 21.05
ExecuteNetwork.cpp File Reference

Go to the source code of this file.

Functions

template<typename TParser , typename TDataType >
int MainImpl (const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
 
int main (int argc, const char *argv[])
 

Function Documentation

◆ main()

int main ( int  argc,
const char *  argv[] 
)

Definition at line 537 of file ExecuteNetwork.cpp.

References ARMNN_LOG, ExecuteNetworkParams::ArmNNTfLiteDelegate, ExecuteNetworkParams::ArmNNTfLiteParser, armnn::ConfigureLogging(), IRuntime::Create(), armnn::Debug, armnn::Info, ProgramOptions::m_ExNetParams, ExecuteNetworkParams::m_ModelFormat, ProgramOptions::m_RuntimeOptions, ExecuteNetworkParams::m_TfLiteExecutor, and ExecuteNetworkParams::TfliteInterpreter.

538 {
539  // Configures logging for both the ARMNN library and this test program.
540  #ifdef NDEBUG
542  #else
544  #endif
545  armnn::ConfigureLogging(true, true, level);
546 
547 
548  // Get ExecuteNetwork parameters and runtime options from command line
549  ProgramOptions ProgramOptions(argc, argv);
550 
551  // Create runtime
552  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
553 
554  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
555 
556  // Forward to implementation based on the parser type
557  if (modelFormat.find("armnn") != std::string::npos)
558  {
559  #if defined(ARMNN_SERIALIZER)
560  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
561  #else
562  ARMNN_LOG(fatal) << "Not built with serialization support.";
563  return EXIT_FAILURE;
564  #endif
565  }
566  else if (modelFormat.find("onnx") != std::string::npos)
567  {
568  #if defined(ARMNN_ONNX_PARSER)
569  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
570  #else
571  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
572  return EXIT_FAILURE;
573  #endif
574  }
575  else if(modelFormat.find("tflite") != std::string::npos)
576  {
578  {
579  #if defined(ARMNN_TF_LITE_PARSER)
580  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
581  #else
582  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
583  return EXIT_FAILURE;
584  #endif
585  }
590  {
591  #if defined(ARMNN_TF_LITE_DELEGATE)
592  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, runtime);
593  #else
594  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
595  return EXIT_FAILURE;
596  #endif
597  }
598  }
599  else
600  {
601  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
602  << "'. Please include 'tflite' or 'onnx'";
603  return EXIT_FAILURE;
604  }
605 }
ExecuteNetworkParams m_ExNetParams
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
Holds and parses program options for the ExecuteNetwork application.
LogSeverity
Definition: Utils.hpp:13

◆ MainImpl()

int MainImpl ( const ExecuteNetworkParams params,
const std::shared_ptr< armnn::IRuntime > &  runtime = nullptr 
)

Definition at line 276 of file ExecuteNetwork.cpp.

References ARMNN_LOG, InferenceModel< IParser, TDataType >::CreateWorkingMemHandle(), InferenceModel< IParser, TDataType >::GetInputQuantizationParams(), InferenceModel< IParser, TDataType >::GetInputSize(), InferenceModel< IParser, TDataType >::GetOutputBindingInfos(), InferenceModel< IParser, TDataType >::GetOutputSize(), Params::m_AsyncEnabled, ExecuteNetworkParams::m_CachedNetworkFilePath, Params::m_CachedNetworkFilePath, ExecuteNetworkParams::m_ComputeDevices, Params::m_ComputeDevices, ExecuteNetworkParams::m_Concurrent, ExecuteNetworkParams::m_DequantizeOutput, ExecuteNetworkParams::m_DynamicBackendsPath, Params::m_DynamicBackendsPath, ExecuteNetworkParams::m_EnableBf16TurboMode, Params::m_EnableBf16TurboMode, ExecuteNetworkParams::m_EnableFastMath, Params::m_EnableFastMath, ExecuteNetworkParams::m_EnableFp16TurboMode, Params::m_EnableFp16TurboMode, ExecuteNetworkParams::m_EnableLayerDetails, ExecuteNetworkParams::m_EnableProfiling, ExecuteNetworkParams::m_GenerateTensorData, ExecuteNetworkParams::m_InferOutputShape, Params::m_InferOutputShape, Params::m_InputBindings, ExecuteNetworkParams::m_InputNames, Params::m_InputShapes, ExecuteNetworkParams::m_InputTensorDataFilePaths, ExecuteNetworkParams::m_InputTensorShapes, ExecuteNetworkParams::m_InputTypes, ExecuteNetworkParams::m_IsModelBinary, Params::m_IsModelBinary, ExecuteNetworkParams::m_Iterations, ExecuteNetworkParams::m_MLGOTuningFilePath, Params::m_MLGOTuningFilePath, ExecuteNetworkParams::m_ModelPath, Params::m_ModelPath, ExecuteNetworkParams::m_NumberOfThreads, Params::m_NumberOfThreads, Params::m_OutputBindings, ExecuteNetworkParams::m_OutputNames, ExecuteNetworkParams::m_OutputTensorFiles, ExecuteNetworkParams::m_OutputTypes, ExecuteNetworkParams::m_ParseUnsupported, Params::m_ParseUnsupported, ExecuteNetworkParams::m_PrintIntermediate, Params::m_PrintIntermediateLayers, ExecuteNetworkParams::m_QuantizeInput, ExecuteNetworkParams::m_SaveCachedNetwork, Params::m_SaveCachedNetwork, ExecuteNetworkParams::m_SimultaneousIterations, ExecuteNetworkParams::m_SubgraphId, Params::m_SubgraphId, ExecuteNetworkParams::m_ThresholdTime, Params::m_VisualizePostOptimizationModel, PopulateTensorWithData(), InferenceModel< IParser, TDataType >::Run(), InferenceModel< IParser, TDataType >::RunAsync(), and Exception::what().

278 {
279  using TContainer =
280  mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
281 
282  std::vector<std::vector<TContainer>> inputs;
283  std::vector<std::vector<TContainer>> outputs;
284 
285  try
286  {
287  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
288  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
289  inferenceModelParams.m_ModelPath = params.m_ModelPath;
290  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
291  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
292  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
293  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
294  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
295  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
296  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
297  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
298  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
299  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
300  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
301  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
302  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
303 
304  for(const std::string& inputName: params.m_InputNames)
305  {
306  inferenceModelParams.m_InputBindings.push_back(inputName);
307  }
308 
309  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
310  {
311  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
312  }
313 
314  for(const std::string& outputName: params.m_OutputNames)
315  {
316  inferenceModelParams.m_OutputBindings.push_back(outputName);
317  }
318 
319  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
320  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
321  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
322 
323  InferenceModel<TParser, TDataType> model(inferenceModelParams,
324  params.m_EnableProfiling,
325  params.m_DynamicBackendsPath,
326  runtime);
327 
328  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
329 
331  armnn::MakeOptional<QuantizationParams>(
332  model.GetInputQuantizationParams()) :
334 
335  for(unsigned int j = 0; j < params.m_SimultaneousIterations ; ++j)
336  {
337  std::vector<TContainer> inputDataContainers;
338  for(unsigned int i = 0; i < numInputs; ++i)
339  {
342  armnn::MakeOptional<std::string>(
343  params.m_InputTensorDataFilePaths[(j * numInputs) + i]);
344 
345  unsigned int numElements = model.GetInputSize(i);
346  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
347  {
348  // If the user has provided a tensor shape for the current input,
349  // override numElements
350  numElements = params.m_InputTensorShapes[i]->GetNumElements();
351  }
352 
353  TContainer tensorData;
354  PopulateTensorWithData(tensorData,
355  numElements,
356  params.m_InputTypes[i],
357  qParams,
358  dataFile);
359 
360  inputDataContainers.push_back(tensorData);
361  }
362  inputs.push_back(inputDataContainers);
363  }
364 
365  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
366 
367  for (unsigned int j = 0; j < params.m_SimultaneousIterations; ++j)
368  {
369  std::vector <TContainer> outputDataContainers;
370  for (unsigned int i = 0; i < numOutputs; ++i)
371  {
372  if (params.m_OutputTypes[i].compare("float") == 0)
373  {
374  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
375  } else if (params.m_OutputTypes[i].compare("int") == 0)
376  {
377  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
378  } else if (params.m_OutputTypes[i].compare("qasymm8") == 0)
379  {
380  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
381  } else if (params.m_OutputTypes[i].compare("qsymms8") == 0)
382  {
383  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
384  } else
385  {
386  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
387  return EXIT_FAILURE;
388  }
389  }
390  outputs.push_back(outputDataContainers);
391  }
392 
393  if (!params.m_Concurrent)
394  {
395  // Synchronous Execution
396  for (size_t x = 0; x < params.m_Iterations; x++)
397  {
398  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
399  auto inference_duration = model.Run(inputs[0], outputs[0]);
400 
401  if (params.m_GenerateTensorData)
402  {
403  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
404  }
405 
406  // Print output tensors
407  const auto& infosOut = model.GetOutputBindingInfos();
408  for (size_t i = 0; i < numOutputs; i++)
409  {
410  const armnn::TensorInfo& infoOut = infosOut[i].second;
411  auto outputTensorFile = params.m_OutputTensorFiles.empty() ? "" : params.m_OutputTensorFiles[i];
412 
413  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
414  infoOut,
415  outputTensorFile,
416  params.m_DequantizeOutput);
417  mapbox::util::apply_visitor(printer, outputs[0][i]);
418  }
419 
420  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
421  << std::fixed << inference_duration.count() << " ms\n";
422 
423  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
424  if (params.m_ThresholdTime != 0.0)
425  {
426  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
427  << std::fixed << params.m_ThresholdTime << " ms";
428  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
429  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
430  << std::fixed << thresholdMinusInference << " ms" << "\n";
431 
432  if (thresholdMinusInference < 0)
433  {
434  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
435  ARMNN_LOG(fatal) << errorMessage;
436  }
437  }
438  }
439  }
440  else
441  {
442  try
443  {
444  ARMNN_LOG(info) << "Asynchronous Execution... \n";
445  std::vector<std::future<std::tuple<armnn::profiling::ProfilingGuid,
446  std::chrono::duration<double, std::milli>>>> inferenceResults;
447  inferenceResults.reserve(params.m_SimultaneousIterations);
448 
449  // Create WorkingMemHandles for each inference
450  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
451  workingMemHandles.reserve(params.m_SimultaneousIterations);
452  for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
453  {
454  workingMemHandles.push_back(model.CreateWorkingMemHandle());
455  }
456 
457  // Run each inference in its own thread
458  for (unsigned int i = 0; i < params.m_SimultaneousIterations; ++i)
459  {
460  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
461  inferenceResults.push_back(std::async(
462  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
463  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i]);
464  }
465  ));
466  }
467 
468  // Check the results
469  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
470  {
471  // Get the results
472  auto inferenceResult = inferenceResults[j].get();
473  auto inference_duration = std::get<1>(inferenceResult);
474  auto inferenceID = std::get<0>(inferenceResult);
475 
476  if (params.m_GenerateTensorData)
477  {
478  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
479  }
480 
481  // Print output tensors
482  const auto& infosOut = model.GetOutputBindingInfos();
483  for (size_t i = 0; i < numOutputs; i++)
484  {
485  const armnn::TensorInfo& infoOut = infosOut[i].second;
486  auto outputTensorFile = params.m_OutputTensorFiles.empty()
487  ? ""
488  : params.m_OutputTensorFiles[(j * numOutputs) + i];
489 
490  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
491  infoOut,
492  outputTensorFile,
493  params.m_DequantizeOutput);
494  mapbox::util::apply_visitor(printer, outputs[j][i]);
495  }
496 
497  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
498  << std::fixed << inference_duration.count() << " ms\n";
499 
500  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
501  if (params.m_ThresholdTime != 0.0)
502  {
503  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
504  << std::fixed << params.m_ThresholdTime << " ms";
505  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
506  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
507  << std::fixed << thresholdMinusInference << " ms" << "\n";
508 
509  if (thresholdMinusInference < 0)
510  {
511  ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
512  }
513  }
514  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
515 
516  }
517  }
518  catch (const armnn::Exception& e)
519  {
520  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
521  return EXIT_FAILURE;
522  }
523 
524  }
525  }
526  catch (const armnn::Exception& e)
527  {
528  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
529  return EXIT_FAILURE;
530  }
531 
532  return EXIT_SUCCESS;
533 }
std::vector< std::string > m_InputTypes
std::vector< TensorShapePtr > m_InputTensorShapes
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
void PopulateTensorWithData(TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< std::string > m_OutputTensorFiles
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
Definition: Optional.hpp:305