ArmNN
 22.05
ExecuteNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
10 
11 #include <armnn/Logging.hpp>
15 #include <InferenceTest.hpp>
16 
17 #if defined(ARMNN_SERIALIZER)
19 #endif
20 #if defined(ARMNN_TF_LITE_PARSER)
22 #endif
23 #if defined(ARMNN_ONNX_PARSER)
25 #endif
26 #if defined(ARMNN_TFLITE_DELEGATE)
27 #include <armnn_delegate.hpp>
28 #include <DelegateOptions.hpp>
29 
30 #include <tensorflow/lite/builtin_ops.h>
31 #include <tensorflow/lite/c/builtin_op_data.h>
32 #include <tensorflow/lite/c/common.h>
33 #include <tensorflow/lite/optional_debug_tools.h>
34 #include <tensorflow/lite/kernels/builtin_op_kernels.h>
35 #include <tensorflow/lite/interpreter.h>
36 #include <tensorflow/lite/kernels/register.h>
37 #endif
38 
39 #include <future>
40 
41 /**
42  * Given a measured duration and a threshold time tell the user whether we succeeded or not.
43  *
44  * @param duration the measured inference duration.
45  * @param thresholdTime the threshold time in milliseconds.
46  * @return false if the measured time exceeded the threshold.
47  */
48 bool CheckInferenceTimeThreshold(const std::chrono::duration<double, std::milli>& duration,
49  const double& thresholdTime)
50 {
51  ARMNN_LOG(info) << "Inference time: " << std::setprecision(2)
52  << std::fixed << duration.count() << " ms\n";
53  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
54  if (thresholdTime != 0.0)
55  {
56  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
57  << std::fixed << thresholdTime << " ms";
58  auto thresholdMinusInference = thresholdTime - duration.count();
59  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
60  << std::fixed << thresholdMinusInference << " ms" << "\n";
61  if (thresholdMinusInference < 0)
62  {
63  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
64  ARMNN_LOG(fatal) << errorMessage;
65  return false;
66  }
67  }
68  return true;
69 }
70 
71 #if defined(ARMNN_TFLITE_DELEGATE)
72 int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions)
73 {
74  // Build model and corresponding interpreter
75  using namespace tflite;
76 
77  std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str());
78 
79  auto tfLiteInterpreter = std::make_unique<Interpreter>();
80  tflite::ops::builtin::BuiltinOpResolver resolver;
81 
82  tflite::InterpreterBuilder builder(*model, resolver);
83  builder(&tfLiteInterpreter);
84  tfLiteInterpreter->AllocateTensors();
85 
86  int status = 0;
87 
88  // Create & populate Armnn Delegate, then register it to TfLiteInterpreter
90  {
91  // Create the Armnn Delegate
92  // Populate a DelegateOptions from the ExecuteNetworkParams.
93  armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions();
94  delegateOptions.SetExternalProfilingParams(
96 
97  std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
98  theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
100  // Register armnn_delegate to TfLiteInterpreter
101  status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
102  if (status != kTfLiteOk)
103  {
104  ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
105  return EXIT_FAILURE;
106  }
107  }
108  else
109  {
110  std::cout << "Running on TfLite without ArmNN delegate\n";
111  }
112 
113  // Load (or generate) input data for inference
116  : armnn::MakeOptional<std::string>(params.m_InputTensorDataFilePaths[0]);
117 
118  const size_t numInputs = params.m_InputNames.size();
119 
120  // Populate input tensor of interpreter
121  for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
122  {
123  int input = tfLiteInterpreter->inputs()[inputIndex];
124  TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
125 
126  unsigned int inputSize = 1;
127  if (params.m_InputTensorShapes.size() > 0)
128  {
129  inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements();
130  }
131  else
132  {
133  for (unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
134  {
135  inputSize *= inputDims->data[dim];
136  }
137  }
138 
139  if (params.m_InputTypes[inputIndex].compare("float") == 0)
140  {
141  auto inputData = tfLiteInterpreter->typed_tensor<float>(input);
142 
143  if(inputData == NULL)
144  {
145  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
146  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
147  return EXIT_FAILURE;
148  }
149 
150  std::vector<float> tensorData;
151  PopulateTensorWithDataGeneric<float>(tensorData,
152  inputSize,
153  dataFile,
154  [](const std::string& s)
155  { return std::stof(s); });
156 
157  std::copy(tensorData.begin(), tensorData.end(), inputData);
158  }
159  else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0 ||
160  params.m_InputTypes[inputIndex].compare("qasymms8") == 0)
161  {
162  auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
163 
164  if(inputData == NULL)
165  {
166  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
167  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
168  return EXIT_FAILURE;
169  }
170 
171  std::vector<int8_t> tensorData;
172  PopulateTensorWithDataGeneric<int8_t>(tensorData,
173  inputSize,
174  dataFile,
175  [](const std::string& s)
176  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
177 
178  std::copy(tensorData.begin(), tensorData.end(), inputData);
179  }
180  else if (params.m_InputTypes[inputIndex].compare("int") == 0)
181  {
182  auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
183 
184  if(inputData == NULL)
185  {
186  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
187  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
188  return EXIT_FAILURE;
189  }
190 
191  std::vector<int32_t> tensorData;
192  PopulateTensorWithDataGeneric<int32_t>(tensorData,
193  inputSize,
194  dataFile,
195  [](const std::string& s)
196  { return std::stoi(s); });
197 
198  std::copy(tensorData.begin(), tensorData.end(), inputData);
199  }
200  else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 ||
201  params.m_InputTypes[inputIndex].compare("qasymmu8") == 0)
202  {
203  auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
204 
205  if(inputData == NULL)
206  {
207  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
208  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
209  return EXIT_FAILURE;
210  }
211 
212  std::vector<uint8_t> tensorData;
213  PopulateTensorWithDataGeneric<uint8_t>(tensorData,
214  inputSize,
215  dataFile,
216  [](const std::string& s)
217  { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
218 
219  std::copy(tensorData.begin(), tensorData.end(), inputData);
220  }
221  else
222  {
223  ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". ";
224  return EXIT_FAILURE;
225  }
226  }
227 
228  // Run inference, print the output of the inference
229  for (size_t x = 0; x < params.m_Iterations; x++)
230  {
231  // Start timer to record inference time in milliseconds.
232  const auto start_time = armnn::GetTimeNow();
233  // Run the inference
234  status = tfLiteInterpreter->Invoke();
235  const auto duration = armnn::GetTimeDuration(start_time);
236 
237  // The TFLite interpreter's outputs might be in a different order than the user inputted output names.
238  std::map<unsigned int, int> paramToTfliteOutputIndex;
239  for (unsigned int paramIndex = 0; paramIndex < params.m_OutputNames.size(); ++paramIndex)
240  {
241  paramToTfliteOutputIndex[paramIndex] = -1;
242  for (unsigned int tfLiteIndex = 0; tfLiteIndex < tfLiteInterpreter->outputs().size(); ++tfLiteIndex)
243  {
244  if (params.m_OutputNames[paramIndex] == tfLiteInterpreter->GetOutputName(tfLiteIndex))
245  {
246  paramToTfliteOutputIndex[paramIndex] = tfLiteIndex;
247  }
248  }
249  }
250 
251  // Print out the output
252  for (unsigned int paramOutputIndex = 0; paramOutputIndex < params.m_OutputNames.size(); ++paramOutputIndex)
253  {
254  int outputIndex = paramToTfliteOutputIndex[paramOutputIndex];
255  if (outputIndex == -1)
256  {
257  std::cout << fmt::format("Output name: {} doesn't exist.", params.m_OutputNames[paramOutputIndex]) <<
258  std::endl;
259  continue;
260  }
261  auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
262  TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
263  // If we've been asked to write to a file then set a file output stream. Otherwise use stdout.
264  FILE* outputTensorFile = stdout;
265  if (!params.m_OutputTensorFiles.empty())
266  {
267  outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w");
268  if (outputTensorFile == NULL)
269  {
270  ARMNN_LOG(fatal) << "Specified output tensor file, \"" <<
271  params.m_OutputTensorFiles[outputIndex] <<
272  "\", cannot be created. Defaulting to stdout. " <<
273  "Error was: " << std::strerror(errno);
274  outputTensorFile = stdout;
275  }
276  else
277  {
278  ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '"
279  << params.m_OutputTensorFiles[outputIndex] << "'";
280  }
281  }
282  long outputSize = 1;
283  for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
284  {
285  outputSize *= outputDims->data[dim];
286  }
287 
288  std::cout << tfLiteInterpreter->GetOutputName(outputIndex) << ": ";
289  if (params.m_OutputTypes[paramOutputIndex].compare("float") == 0)
290  {
291  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
292  if(tfLiteDelageOutputData == NULL)
293  {
294  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
295  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
296  return EXIT_FAILURE;
297  }
298 
299  if (!params.m_DontPrintOutputs)
300  {
301  for (int i = 0; i < outputSize; ++i)
302  {
303  fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]);
304  }
305  }
306  }
307  else if (params.m_OutputTypes[paramOutputIndex].compare("int") == 0)
308  {
309  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
310  if(tfLiteDelageOutputData == NULL)
311  {
312  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
313  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
314  return EXIT_FAILURE;
315  }
316 
317  if (!params.m_DontPrintOutputs)
318  {
319  for (int i = 0; i < outputSize; ++i)
320  {
321  fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
322  }
323  }
324  }
325  else if (params.m_OutputTypes[paramOutputIndex].compare("qsymms8") == 0 ||
326  params.m_OutputTypes[paramOutputIndex].compare("qasymms8") == 0)
327  {
328  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
329  if(tfLiteDelageOutputData == NULL)
330  {
331  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
332  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
333  return EXIT_FAILURE;
334  }
335 
336  if (!params.m_DontPrintOutputs)
337  {
338  for (int i = 0; i < outputSize; ++i)
339  {
340  fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]);
341  }
342  }
343  }
344  else if (params.m_OutputTypes[paramOutputIndex].compare("qasymm8") == 0 ||
345  params.m_OutputTypes[paramOutputIndex].compare("qasymmu8") == 0)
346  {
347  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
348  if(tfLiteDelageOutputData == NULL)
349  {
350  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
351  "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect.";
352  return EXIT_FAILURE;
353  }
354 
355  if (!params.m_DontPrintOutputs)
356  {
357  for (int i = 0; i < outputSize; ++i)
358  {
359  fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]);
360  }
361  }
362  }
363  else
364  {
365  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
366  "\"" << params.m_OutputTypes[paramOutputIndex] <<
367  "\" may be incorrect. Output type can be specified with -z argument";
368  return EXIT_FAILURE;
369  }
370  std::cout << std::endl;
371  }
373  }
374 
375  return status;
376 }
377 #endif
378 template<typename TParser, typename TDataType>
379 int MainImpl(const ExecuteNetworkParams& params,
380  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
381 {
382  using namespace std::chrono;
383 
384  std::vector<std::vector<armnnUtils::TContainer>> inputs;
385  std::vector<std::vector<armnnUtils::TContainer>> outputs;
386 
387  try
388  {
389  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
390  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
391  inferenceModelParams.m_ModelPath = params.m_ModelPath;
392  inferenceModelParams.m_AllowExpandedDims = params.m_AllowExpandedDims;
393  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
394  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
395  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
396  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
397  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
398  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
399  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
400  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
401  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
402  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
403  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
404  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
405  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
406  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
407  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
408  inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut;
409  inferenceModelParams.m_ImportInputsIfAligned = params.m_ImportInputsIfAligned;
410 
411  for(const std::string& inputName: params.m_InputNames)
412  {
413  inferenceModelParams.m_InputBindings.push_back(inputName);
414  }
415 
416  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
417  {
418  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
419  }
420 
421  for(const std::string& outputName: params.m_OutputNames)
422  {
423  inferenceModelParams.m_OutputBindings.push_back(outputName);
424  }
425 
426  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
427  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
428  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
429 
430  InferenceModel<TParser, TDataType> model(inferenceModelParams,
431  params.m_EnableProfiling,
432  params.m_DynamicBackendsPath,
433  runtime);
434 
435  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
436 
438  armnn::MakeOptional<QuantizationParams>(
439  model.GetInputQuantizationParams()) :
441 
442  if (params.m_InputTensorDataFilePaths.size() > numInputs)
443  {
444  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
445  << "for each input. The user provided "
446  << params.m_InputTensorDataFilePaths.size()
447  << " input-tensor-data file/s which will be used to fill the input/s.\n";
448  }
449 
450  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
451 
452  // The user is allowed to specify the data type of each output tensor. It is used here to construct the
453  // result tensors for each iteration. It is possible for the user to specify a type that does not match
454  // the data type of the corresponding model output. It may not make sense, but it is historically allowed.
455  // The potential problem here is a buffer overrun when a larger data type is written into the space for a
456  // smaller one. Issue a warning to highlight the potential problem.
457  for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx)
458  {
459  armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType();
460  switch (type)
461  {
462  // --output-type only supports float, int, qasymms8 or qasymmu8.
464  if (params.m_OutputTypes[outputIdx].compare("float") != 0)
465  {
466  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The "
467  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
468  ". This may cause unexpected problems or random failures.";
469  }
470  break;
472  if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0)
473  {
474  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The "
475  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
476  ". This may cause unexpected problems or random failures.";
477  }
478  break;
480  if (params.m_OutputTypes[outputIdx].compare("int") != 0)
481  {
482  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The "
483  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
484  ". This may cause unexpected problems or random failures.";
485  }
486  break;
488  if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0)
489  {
490  ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The "
491  << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] <<
492  ". This may cause unexpected problems or random failures.";
493  }
494  break;
495  default:
496  break;
497  }
498  }
499 
500  if (!params.m_ReuseBuffers)
501  {
502  for (unsigned int j = 0; j < params.m_Iterations; ++j)
503  {
504  std::vector<armnnUtils::TContainer> inputDataContainers;
505  for (unsigned int i = 0; i < numInputs; ++i)
506  {
507  // If there are fewer input files given than required for the execution of
508  // params.m_Iterations we simply start with the first input file again
509  size_t inputFileIndex = j * numInputs + i;
510  if (!params.m_InputTensorDataFilePaths.empty())
511  {
512  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
513  }
514 
517  armnn::MakeOptional<std::string>(
518  params.m_InputTensorDataFilePaths.at(
519  inputFileIndex));
520 
521  unsigned int numElements = model.GetInputSize(i);
522  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
523  {
524  // If the user has provided a tensor shape for the current input,
525  // override numElements
526  numElements = params.m_InputTensorShapes[i]->GetNumElements();
527  }
528 
529  armnnUtils::TContainer tensorData;
530  PopulateTensorWithData(tensorData,
531  numElements,
532  params.m_InputTypes[i],
533  qParams,
534  dataFile);
535 
536  inputDataContainers.push_back(tensorData);
537  }
538  inputs.push_back(inputDataContainers);
539  }
540 
541  for (unsigned int j = 0; j < params.m_Iterations; ++j)
542  {
543  std::vector<armnnUtils::TContainer> outputDataContainers;
544  for (unsigned int i = 0; i < numOutputs; ++i)
545  {
546  if (params.m_OutputTypes[i].compare("float") == 0)
547  {
548  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
549  }
550  else if (params.m_OutputTypes[i].compare("int") == 0)
551  {
552  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
553  }
554  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
555  params.m_OutputTypes[i].compare("qasymmu8") == 0)
556  {
557  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
558  }
559  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
560  {
561  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
562  }
563  else
564  {
565  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
566  return EXIT_FAILURE;
567  }
568  }
569  outputs.push_back(outputDataContainers);
570  }
571  }
572  if (params.m_Iterations > 1)
573  {
574  std::stringstream msg;
575  msg << "Network will be executed " << params.m_Iterations;
576  if (params.m_Concurrent)
577  {
578  msg << " times in an asynchronous manner. ";
579  }
580  else
581  {
582  msg << " times successively. ";
583  }
584  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
585  "cover each execution.";
586  ARMNN_LOG(info) << msg.str();
587  }
588 
589  // Synchronous execution
590  if (!params.m_Concurrent && !params.m_ReuseBuffers)
591  {
592  for (size_t x = 0; x < params.m_Iterations; x++)
593  {
594  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
595  auto inference_duration = model.Run(inputs[x], outputs[x]);
596 
597  if (params.m_GenerateTensorData)
598  {
599  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
600  }
601  if (params.m_DontPrintOutputs)
602  {
603  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
604  }
605 
606  // Print output tensors
607  const auto& infosOut = model.GetOutputBindingInfos();
608  for (size_t i = 0; i < numOutputs; i++)
609  {
610  const armnn::TensorInfo& infoOut = infosOut[i].second;
611 
612  // We've made sure before that the number of output files either equals numOutputs, in which
613  // case we override those files when processing the results of each iteration (only the result
614  // of the last iteration will be stored), or there are enough
615  // output files for each output of each iteration.
616  size_t outputFileIndex = x * numOutputs + i;
617  if (!params.m_OutputTensorFiles.empty())
618  {
619  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
620  ARMNN_LOG(info) << "Writing output " << i << " named: '"
621  << inferenceModelParams.m_OutputBindings[i]
622  << "' of iteration: " << x+1 << " to file: '"
623  << params.m_OutputTensorFiles[outputFileIndex] << "'";
624  }
625  auto outputTensorFile = params.m_OutputTensorFiles.empty()
626  ? ""
627  : params.m_OutputTensorFiles[outputFileIndex];
628 
629  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
630  infoOut,
631  outputTensorFile,
632  params.m_DequantizeOutput,
633  !params.m_DontPrintOutputs);
634  mapbox::util::apply_visitor(printer, outputs[x][i]);
635  }
636 
637  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
638  << std::fixed << inference_duration.count() << " ms\n";
639 
640  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
641  if (params.m_ThresholdTime != 0.0)
642  {
643  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
644  << std::fixed << params.m_ThresholdTime << " ms";
645  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
646  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
647  << std::fixed << thresholdMinusInference << " ms" << "\n";
648 
649  if (thresholdMinusInference < 0)
650  {
651  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
652  ARMNN_LOG(fatal) << errorMessage;
653  }
654  }
655  }
656  }
657  // Synchronous Execution using a single buffer for input and output data
658  else if(!params.m_Concurrent)
659  {
660  std::vector<armnnUtils::TContainer> input;
661  std::vector<armnnUtils::TContainer> output;
662 
663  for (unsigned int i = 0; i < numInputs; ++i)
664  {
665  // If there are fewer input files given than required for the execution of
666  // params.m_Iterations we simply start with the first input file again
667  size_t inputFileIndex = numInputs + i;
668  if (!params.m_InputTensorDataFilePaths.empty())
669  {
670  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
671  }
672 
675  armnn::MakeOptional<std::string>(
676  params.m_InputTensorDataFilePaths.at(
677  inputFileIndex));
678 
679  unsigned int numElements = model.GetInputSize(i);
680  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
681  {
682  // If the user has provided a tensor shape for the current input,
683  // override numElements
684  numElements = params.m_InputTensorShapes[i]->GetNumElements();
685  }
686 
687  armnnUtils::TContainer tensorData;
688  PopulateTensorWithData(tensorData,
689  numElements,
690  params.m_InputTypes[i],
691  qParams,
692  dataFile);
693 
694  input.push_back(tensorData);
695  }
696 
697  for (unsigned int i = 0; i < numOutputs; ++i)
698  {
699  if (params.m_OutputTypes[i].compare("float") == 0)
700  {
701  output.push_back(std::vector<float>(model.GetOutputSize(i)));
702  } else if (params.m_OutputTypes[i].compare("int") == 0) {
703  output.push_back(std::vector<int>(model.GetOutputSize(i)));
704  } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
705  params.m_OutputTypes[i].compare("qasymmu8") == 0)
706  {
707  output.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
708  } else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
709  {
710  output.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
711  } else {
712  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
713  return EXIT_FAILURE;
714  }
715  }
716 
717  std::vector<std::chrono::duration<double, std::milli>> timings;
718  timings.reserve(params.m_Iterations);
719  for (size_t x = 0; x < params.m_Iterations; x++)
720  {
721  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
722  auto inference_duration = model.Run(input, output);
723  timings.push_back(inference_duration);
724  }
725 
726  if (params.m_GenerateTensorData)
727  {
728  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
729  }
730  if (params.m_DontPrintOutputs)
731  {
732  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
733  }
734 
735  // Print output. This only needs to happen once as input is the same for each iteration.
736  const auto &infosOut = model.GetOutputBindingInfos();
737  for (size_t i = 0; i < numOutputs; i++)
738  {
739  const armnn::TensorInfo &infoOut = infosOut[i].second;
740 
741  // We've made sure before that the number of output files either equals numOutputs, in which
742  // case we override those files when processing the results of each iteration (only the result
743  // of the last iteration will be stored), or there are enough
744  // output files for each output of each iteration.
745  size_t outputFileIndex = numOutputs + i;
746  if (!params.m_OutputTensorFiles.empty())
747  {
748  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
749  ARMNN_LOG(info) << "Writing output " << i << " named: '"
750  << inferenceModelParams.m_OutputBindings[i] <<" to file: '"
751  << params.m_OutputTensorFiles[outputFileIndex] << "'";
752  }
753  auto outputTensorFile = params.m_OutputTensorFiles.empty()
754  ? ""
755  : params.m_OutputTensorFiles[outputFileIndex];
756 
757  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
758  infoOut,
759  outputTensorFile,
760  params.m_DequantizeOutput,
761  !params.m_DontPrintOutputs);
762  mapbox::util::apply_visitor(printer, output[i]);
763  }
764 
765  for(auto inference: timings)
766  {
767 
768  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
769  << std::fixed << inference.count() << " ms\n";
770 
771  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
772  if (params.m_ThresholdTime != 0.0)
773  {
774  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
775  << std::fixed << params.m_ThresholdTime << " ms";
776  auto thresholdMinusInference = params.m_ThresholdTime - inference.count();
777  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
778  << std::fixed << thresholdMinusInference << " ms" << "\n";
779 
780  if (thresholdMinusInference < 0)
781  {
782  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
783  ARMNN_LOG(fatal) << errorMessage;
784  }
785  }
786  }
787  }
788 
789  // Asynchronous execution using the Arm NN thread pool
790  else if (params.m_ThreadPoolSize >= 1)
791  {
792  try
793  {
794  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
795  armnn::AsyncCallbackManager callbackManager;
796  std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
797 
798  // Declare the latest and earliest inference times here to be used when calculating overall time
799  std::chrono::high_resolution_clock::time_point earliestStartTime;
800  std::chrono::high_resolution_clock::time_point latestEndTime =
801  std::chrono::high_resolution_clock::now();
802 
803  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
804  // LoadedNetwork with each scheduled inference having a specific priority
805  for (size_t i = 0; i < params.m_Iterations; ++i)
806  {
807  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
808  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
809  model.RunAsync(inputs[i], outputs[i], cb);
810  }
811 
812  // Check the results
813  unsigned int j = 0;
814  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
815  {
816  auto cb = callbackManager.GetNotifiedCallback();
817 
818  // Get the results
819  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
820  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
821  auto inferenceDuration = endTime - startTime;
822 
823  if (latestEndTime < cb->GetEndTime())
824  {
825  latestEndTime = cb->GetEndTime();
826  }
827 
828  if (earliestStartTime.time_since_epoch().count() == 0)
829  {
830  earliestStartTime = cb->GetStartTime();
831  }
832  else if (earliestStartTime > cb->GetStartTime())
833  {
834  earliestStartTime = cb->GetStartTime();
835  }
836 
837  if (params.m_GenerateTensorData)
838  {
839  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
840  }
841  if (params.m_DontPrintOutputs)
842  {
843  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
844  }
845 
846  // Print output tensors
847  const auto& infosOut = model.GetOutputBindingInfos();
848  for (size_t i = 0; i < numOutputs; i++)
849  {
850  // We've made sure before that the number of output files either equals numOutputs, in which
851  // case we override those files when processing the results of each iteration (only the
852  // result of the last iteration will be stored), or there are enough
853  // output files for each output of each iteration.
854  size_t outputFileIndex = iteration * numOutputs + i;
855  if (!params.m_OutputTensorFiles.empty())
856  {
857  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
858  ARMNN_LOG(info) << "Writing output " << i << " named: '"
859  << inferenceModelParams.m_OutputBindings[i]
860  << "' of iteration: " << iteration+1 << " to file: '"
861  << params.m_OutputTensorFiles[outputFileIndex] << "'";
862  }
863 
864  const armnn::TensorInfo& infoOut = infosOut[i].second;
865  auto outputTensorFile = params.m_OutputTensorFiles.empty()
866  ? ""
867  : params.m_OutputTensorFiles[outputFileIndex];
868 
869  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
870  infoOut,
871  outputTensorFile,
872  params.m_DequantizeOutput,
873  !params.m_DontPrintOutputs);
874  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
875  }
876 
877  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
878  ++j;
879  }
880  //print duration difference between overallStartTime and overallEndTime
881  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
882  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
883  auto totalInferenceDuration = overallEndTime - overallStartTime;
884  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
885  << std::fixed << totalInferenceDuration.count() << " ms\n";
886  }
887  catch (const armnn::Exception& e)
888  {
889  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
890  return EXIT_FAILURE;
891  }
892  }
893  // Asynchronous execution using std::launch::async
894  else
895  {
896  try
897  {
898  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
899  std::vector<std::future<std::tuple<unsigned int,
900  std::chrono::duration<double, std::milli>>>> inferenceResults;
901  inferenceResults.reserve(params.m_Iterations);
902 
903  // Create WorkingMemHandles for each inference
904  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
905  workingMemHandles.reserve(params.m_Iterations);
906  for (unsigned int i = 0; i < params.m_Iterations; ++i)
907  {
908  workingMemHandles.push_back(model.CreateWorkingMemHandle());
909  }
910 
911  // Run each inference in its own thread
912  // start a timer
913  const auto start_time = armnn::GetTimeNow();
914  for (unsigned int i = 0; i < params.m_Iterations; ++i)
915  {
916  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
917 
918  inferenceResults.push_back(std::async(
919  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
920  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
921  }
922  ));
923  }
924 
925  // Check the results
926  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
927  {
928  // Get the results
929  auto inferenceResult = inferenceResults[j].get();
930  auto inferenceDuration = std::get<1>(inferenceResult);
931  auto inferenceID = std::get<0>(inferenceResult);
932 
933  if (params.m_GenerateTensorData)
934  {
935  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
936  }
937  if (params.m_DontPrintOutputs)
938  {
939  ARMNN_LOG(info) << "Printing outputs to console is disabled.";
940  }
941 
942  // Print output tensors
943  const auto& infosOut = model.GetOutputBindingInfos();
944  for (size_t i = 0; i < numOutputs; i++)
945  {
946  // We've made sure before that the number of output files either equals numOutputs, in which
947  // case we override those files when processing the results of each iteration (only the
948  // result of the last iteration will be stored), or there are enough
949  // output files for each output of each iteration.
950  size_t outputFileIndex = j * numOutputs + i;
951  if (!params.m_OutputTensorFiles.empty())
952  {
953  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
954  ARMNN_LOG(info) << "Writing output " << i << " named: '"
955  << inferenceModelParams.m_OutputBindings[i]
956  << "' of iteration: " << j+1 << " to file: '"
957  << params.m_OutputTensorFiles[outputFileIndex] << "'";
958  }
959  const armnn::TensorInfo& infoOut = infosOut[i].second;
960  auto outputTensorFile = params.m_OutputTensorFiles.empty()
961  ? ""
962  : params.m_OutputTensorFiles[outputFileIndex];
963 
964  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
965  infoOut,
966  outputTensorFile,
967  params.m_DequantizeOutput,
968  !params.m_DontPrintOutputs);
969  mapbox::util::apply_visitor(printer, outputs[j][i]);
970  }
971  CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime);
972  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
973  }
974  // finish timer
975  const auto duration = armnn::GetTimeDuration(start_time);
976  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
977  << std::fixed << duration.count() << " ms\n";
978  }
979  catch (const armnn::Exception& e)
980  {
981  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
982  return EXIT_FAILURE;
983  }
984  }
985  }
986  catch (const armnn::Exception& e)
987  {
988  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
989  return EXIT_FAILURE;
990  }
991 
992  return EXIT_SUCCESS;
993 }
994 
995 // MAIN
996 int main(int argc, const char* argv[])
997 {
998  // Configures logging for both the ARMNN library and this test program.
999  #ifdef NDEBUG
1001  #else
1003  #endif
1004  armnn::ConfigureLogging(true, true, level);
1005 
1006 
1007  // Get ExecuteNetwork parameters and runtime options from command line
1008  // This might throw an InvalidArgumentException if the user provided invalid inputs
1010  try {
1011  ProgramOptions.ParseOptions(argc, argv);
1012  } catch (const std::exception &e){
1013  ARMNN_LOG(fatal) << e.what();
1014  return EXIT_FAILURE;
1015  }
1016 
1017  if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut ||
1019  && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
1020  {
1021  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
1022  return EXIT_FAILURE;
1023  }
1024 
1025  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
1026 
1027  // Forward to implementation based on the parser type
1028  if (modelFormat.find("armnn") != std::string::npos)
1029  {
1030  #if defined(ARMNN_SERIALIZER)
1031  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
1032  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
1033  #else
1034  ARMNN_LOG(fatal) << "Not built with serialization support.";
1035  return EXIT_FAILURE;
1036  #endif
1037  }
1038  else if (modelFormat.find("onnx") != std::string::npos)
1039  {
1040  #if defined(ARMNN_ONNX_PARSER)
1041  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
1042  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
1043  #else
1044  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
1045  return EXIT_FAILURE;
1046  #endif
1047  }
1048  else if(modelFormat.find("tflite") != std::string::npos)
1049  {
1051  {
1052  #if defined(ARMNN_TF_LITE_PARSER)
1053  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
1054  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
1055  #else
1056  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
1057  return EXIT_FAILURE;
1058  #endif
1059  }
1060  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
1062  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
1064  {
1065  #if defined(ARMNN_TF_LITE_DELEGATE)
1066  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions);
1067  #else
1068  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
1069  return EXIT_FAILURE;
1070  #endif
1071  }
1072  }
1073  else
1074  {
1075  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
1076  << "'. Please include 'tflite' or 'onnx'";
1077  return EXIT_FAILURE;
1078  }
1079 }
ExecuteNetworkParams m_ExNetParams
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
std::vector< std::string > m_InputTypes
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
std::vector< std::string > m_OutputNames
std::vector< std::string > m_OutputTensorFiles
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
void SetExternalProfilingParams(const arm::pipe::ProfilingOptions &externalProfilingParams)
std::vector< armnn::TensorShape > m_InputShapes
DataType
Definition: Types.hpp:48
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_InputNames
ProfilingOptions ConvertExternalProfilingOptions(const armnn::IRuntime::CreationOptions::ExternalProfilingOptions &options)
std::vector< std::string > m_InputTensorDataFilePaths
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
TfLiteDelegate * TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
int main(int argc, const char *argv[])
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
Definition: TContainer.hpp:18
unsigned int GetInputSize(unsigned int inputIndex=0u) const
void TfLiteArmnnDelegateDelete(TfLiteDelegate *tfLiteDelegate)
int MainImpl(const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
ExternalProfilingOptions m_ProfilingOptions
Definition: IRuntime.hpp:153
LogSeverity
Definition: Utils.hpp:14
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()