ArmNN
 21.08
ExecuteNetwork.cpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
10 
11 #include <armnn/Logging.hpp>
13 #include <InferenceTest.hpp>
14 
15 #if defined(ARMNN_SERIALIZER)
17 #endif
18 #if defined(ARMNN_TF_LITE_PARSER)
20 #endif
21 #if defined(ARMNN_ONNX_PARSER)
23 #endif
24 #if defined(ARMNN_TFLITE_DELEGATE)
25 #include <armnn_delegate.hpp>
26 #include <DelegateOptions.hpp>
27 
28 #include <tensorflow/lite/builtin_ops.h>
29 #include <tensorflow/lite/c/builtin_op_data.h>
30 #include <tensorflow/lite/c/common.h>
31 #include <tensorflow/lite/optional_debug_tools.h>
32 #include <tensorflow/lite/kernels/builtin_op_kernels.h>
33 #include <tensorflow/lite/interpreter.h>
34 #include <tensorflow/lite/kernels/register.h>
35 #endif
36 
37 #include <future>
38 #if defined(ARMNN_TFLITE_DELEGATE)
39 int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params,
40  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
41 {
42  using namespace tflite;
43 
44  std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str());
45 
46  auto tfLiteInterpreter = std::make_unique<Interpreter>();
47  tflite::ops::builtin::BuiltinOpResolver resolver;
48 
49  tflite::InterpreterBuilder builder(*model, resolver);
50  builder(&tfLiteInterpreter);
51  tfLiteInterpreter->AllocateTensors();
52 
53  int status = 0;
55  {
56  // Create the Armnn Delegate
57  armnnDelegate::DelegateOptions delegateOptions(params.m_ComputeDevices);
58  std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
59  theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions),
61  // Register armnn_delegate to TfLiteInterpreter
62  status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
63  if (status == kTfLiteError)
64  {
65  ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
66  return EXIT_FAILURE;
67  }
68  }
69  else
70  {
71  std::cout << "Running on TfLite without ArmNN delegate\n";
72  }
73 
74 
75  std::vector<std::string> inputBindings;
76  for (const std::string& inputName: params.m_InputNames)
77  {
78  inputBindings.push_back(inputName);
79  }
80 
83  : armnn::MakeOptional<std::string>(params.m_InputTensorDataFilePaths[0]);
84 
85  const size_t numInputs = inputBindings.size();
86 
87  for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
88  {
89  int input = tfLiteInterpreter->inputs()[inputIndex];
90  TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
91 
92  long inputSize = 1;
93  for (unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
94  {
95  inputSize *= inputDims->data[dim];
96  }
97 
98  if (params.m_InputTypes[inputIndex].compare("float") == 0)
99  {
100  auto inputData = tfLiteInterpreter->typed_tensor<float>(input);
101 
102  if(inputData == NULL)
103  {
104  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
105  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
106  return EXIT_FAILURE;
107  }
108 
109  std::vector<float> tensorData;
110  PopulateTensorWithDataGeneric<float>(tensorData,
111  params.m_InputTensorShapes[inputIndex]->GetNumElements(),
112  dataFile,
113  [](const std::string& s)
114  { return std::stof(s); });
115 
116  std::copy(tensorData.begin(), tensorData.end(), inputData);
117  }
118  else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0)
119  {
120  auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
121 
122  if(inputData == NULL)
123  {
124  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
125  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
126  return EXIT_FAILURE;
127  }
128 
129  std::vector<int8_t> tensorData;
130  PopulateTensorWithDataGeneric<int8_t>(tensorData,
131  params.m_InputTensorShapes[inputIndex]->GetNumElements(),
132  dataFile,
133  [](const std::string& s)
134  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
135 
136  std::copy(tensorData.begin(), tensorData.end(), inputData);
137  }
138  else if (params.m_InputTypes[inputIndex].compare("int") == 0)
139  {
140  auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
141 
142  if(inputData == NULL)
143  {
144  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
145  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
146  return EXIT_FAILURE;
147  }
148 
149  std::vector<int32_t> tensorData;
150  PopulateTensorWithDataGeneric<int32_t>(tensorData,
151  params.m_InputTensorShapes[inputIndex]->GetNumElements(),
152  dataFile,
153  [](const std::string& s)
154  { return std::stoi(s); });
155 
156  std::copy(tensorData.begin(), tensorData.end(), inputData);
157  }
158  else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 ||
159  params.m_InputTypes[inputIndex].compare("qasymmu8") == 0)
160  {
161  auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
162 
163  if(inputData == NULL)
164  {
165  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
166  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
167  return EXIT_FAILURE;
168  }
169 
170  std::vector<uint8_t> tensorData;
171  PopulateTensorWithDataGeneric<uint8_t>(tensorData,
172  params.m_InputTensorShapes[inputIndex]->GetNumElements(),
173  dataFile,
174  [](const std::string& s)
175  { return armnn::numeric_cast<uint8_t>(std::stoi(s)); });
176 
177  std::copy(tensorData.begin(), tensorData.end(), inputData);
178  }
179  else if (params.m_InputTypes[inputIndex].compare("qasymms8") == 0)
180  {
181  auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
182 
183  if(inputData == NULL)
184  {
185  ARMNN_LOG(fatal) << "Input tensor is null, input type: "
186  "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect.";
187  return EXIT_FAILURE;
188  }
189 
190  std::vector<int8_t> tensorData;
191  PopulateTensorWithDataGeneric<int8_t>(tensorData,
192  params.m_InputTensorShapes[inputIndex]->GetNumElements(),
193  dataFile,
194  [](const std::string& s)
195  { return armnn::numeric_cast<int8_t>(std::stoi(s)); });
196 
197  std::copy(tensorData.begin(), tensorData.end(), inputData);
198  }
199  else
200  {
201  ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". ";
202  return EXIT_FAILURE;
203  }
204  }
205 
206  for (size_t x = 0; x < params.m_Iterations; x++)
207  {
208  // Run the inference
209  status = tfLiteInterpreter->Invoke();
210 
211  // Print out the output
212  for (unsigned int outputIndex = 0; outputIndex < params.m_OutputNames.size(); ++outputIndex)
213  {
214  auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
215  TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
216 
217  long outputSize = 1;
218  for (unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
219  {
220  outputSize *= outputDims->data[dim];
221  }
222 
223  std::cout << params.m_OutputNames[outputIndex] << ": ";
224  if (params.m_OutputTypes[outputIndex].compare("float") == 0)
225  {
226  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<float>(tfLiteDelegateOutputId);
227  if(tfLiteDelageOutputData == NULL)
228  {
229  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
230  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
231  return EXIT_FAILURE;
232  }
233 
234  for (int i = 0; i < outputSize; ++i)
235  {
236  printf("%f ", tfLiteDelageOutputData[i]);
237  }
238  }
239  else if (params.m_OutputTypes[outputIndex].compare("int") == 0)
240  {
241  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
242  if(tfLiteDelageOutputData == NULL)
243  {
244  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
245  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
246  return EXIT_FAILURE;
247  }
248 
249  for (int i = 0; i < outputSize; ++i)
250  {
251  printf("%d ", tfLiteDelageOutputData[i]);
252  }
253  }
254  else if (params.m_OutputTypes[outputIndex].compare("qsymms8") == 0)
255  {
256  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
257  if(tfLiteDelageOutputData == NULL)
258  {
259  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
260  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
261  return EXIT_FAILURE;
262  }
263 
264  for (int i = 0; i < outputSize; ++i)
265  {
266  printf("%d ", tfLiteDelageOutputData[i]);
267  }
268  }
269  else if (params.m_OutputTypes[outputIndex].compare("qasymm8") == 0 ||
270  params.m_OutputTypes[outputIndex].compare("qasymmu8") == 0)
271  {
272  auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
273  if(tfLiteDelageOutputData == NULL)
274  {
275  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
276  "\"" << params.m_OutputTypes[outputIndex] << "\" may be incorrect.";
277  return EXIT_FAILURE;
278  }
279 
280  for (int i = 0; i < outputSize; ++i)
281  {
282  printf("%u ", tfLiteDelageOutputData[i]);
283  }
284  }
285  else
286  {
287  ARMNN_LOG(fatal) << "Output tensor is null, output type: "
288  "\"" << params.m_OutputTypes[outputIndex] <<
289  "\" may be incorrect. Output type can be specified with -z argument";
290  return EXIT_FAILURE;
291  }
292  std::cout << std::endl;
293  }
294  }
295 
296  return status;
297 }
298 #endif
299 template<typename TParser, typename TDataType>
300 int MainImpl(const ExecuteNetworkParams& params,
301  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
302 {
303  using namespace std::chrono;
304 
305  std::vector<std::vector<TContainer>> inputs;
306  std::vector<std::vector<TContainer>> outputs;
307 
308  try
309  {
310  // Creates an InferenceModel, which will parse the model and load it into an IRuntime.
311  typename InferenceModel<TParser, TDataType>::Params inferenceModelParams;
312  inferenceModelParams.m_ModelPath = params.m_ModelPath;
313  inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary;
314  inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices;
315  inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
316  inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate;
317  inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails;
318  inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported;
319  inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape;
320  inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath;
321  inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork;
322  inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath;
323  inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads;
324  inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath;
325  inferenceModelParams.m_AsyncEnabled = params.m_Concurrent;
326  inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize;
327  inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut;
328 
329  for(const std::string& inputName: params.m_InputNames)
330  {
331  inferenceModelParams.m_InputBindings.push_back(inputName);
332  }
333 
334  for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i)
335  {
336  inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]);
337  }
338 
339  for(const std::string& outputName: params.m_OutputNames)
340  {
341  inferenceModelParams.m_OutputBindings.push_back(outputName);
342  }
343 
344  inferenceModelParams.m_SubgraphId = params.m_SubgraphId;
345  inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode;
346  inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode;
347 
348  InferenceModel<TParser, TDataType> model(inferenceModelParams,
349  params.m_EnableProfiling,
350  params.m_DynamicBackendsPath,
351  runtime);
352 
353  const size_t numInputs = inferenceModelParams.m_InputBindings.size();
354 
356  armnn::MakeOptional<QuantizationParams>(
357  model.GetInputQuantizationParams()) :
359 
360  if (params.m_InputTensorDataFilePaths.size() > numInputs)
361  {
362  ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required "
363  << "for each input. The user provided "
364  << params.m_InputTensorDataFilePaths.size()
365  << " input-tensor-data file/s which will be used to fill the input/s.\n";
366  }
367 
368  for(unsigned int j = 0; j < params.m_Iterations ; ++j)
369  {
370  std::vector<TContainer> inputDataContainers;
371  for(unsigned int i = 0; i < numInputs; ++i)
372  {
373  // If there are less input files given than required for the execution of
374  // params.m_Iterations we simply start with the first input file again
375  size_t inputFileIndex = j * numInputs + i;
376  if (!params.m_InputTensorDataFilePaths.empty())
377  {
378  inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size();
379  }
380 
383  armnn::MakeOptional<std::string>(
384  params.m_InputTensorDataFilePaths.at(inputFileIndex));
385 
386  unsigned int numElements = model.GetInputSize(i);
387  if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i])
388  {
389  // If the user has provided a tensor shape for the current input,
390  // override numElements
391  numElements = params.m_InputTensorShapes[i]->GetNumElements();
392  }
393 
394  TContainer tensorData;
395  PopulateTensorWithData(tensorData,
396  numElements,
397  params.m_InputTypes[i],
398  qParams,
399  dataFile);
400 
401  inputDataContainers.push_back(tensorData);
402  }
403  inputs.push_back(inputDataContainers);
404  }
405 
406  const size_t numOutputs = inferenceModelParams.m_OutputBindings.size();
407 
408  for (unsigned int j = 0; j < params.m_Iterations; ++j)
409  {
410  std::vector <TContainer> outputDataContainers;
411  for (unsigned int i = 0; i < numOutputs; ++i)
412  {
413  if (params.m_OutputTypes[i].compare("float") == 0)
414  {
415  outputDataContainers.push_back(std::vector<float>(model.GetOutputSize(i)));
416  }
417  else if (params.m_OutputTypes[i].compare("int") == 0)
418  {
419  outputDataContainers.push_back(std::vector<int>(model.GetOutputSize(i)));
420  }
421  else if (params.m_OutputTypes[i].compare("qasymm8") == 0 ||
422  params.m_OutputTypes[i].compare("qasymmu8") == 0)
423  {
424  outputDataContainers.push_back(std::vector<uint8_t>(model.GetOutputSize(i)));
425  }
426  else if (params.m_OutputTypes[i].compare("qasymms8") == 0)
427  {
428  outputDataContainers.push_back(std::vector<int8_t>(model.GetOutputSize(i)));
429  } else
430  {
431  ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". ";
432  return EXIT_FAILURE;
433  }
434  }
435  outputs.push_back(outputDataContainers);
436  }
437 
438  if (params.m_Iterations > 1)
439  {
440  std::stringstream msg;
441  msg << "Network will be executed " << params.m_Iterations;
442  if (params.m_Concurrent)
443  {
444  msg << " times in an asynchronous manner. ";
445  }
446  else
447  {
448  msg << " times successively. ";
449  }
450  msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
451  "cover each execution.";
452  ARMNN_LOG(info) << msg.str();
453  }
454 
455  // Synchronous execution
456  if (!params.m_Concurrent)
457  {
458  for (size_t x = 0; x < params.m_Iterations; x++)
459  {
460  // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds)
461  auto inference_duration = model.Run(inputs[x], outputs[x]);
462 
463  if (params.m_GenerateTensorData)
464  {
465  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
466  }
467 
468  // Print output tensors
469  const auto& infosOut = model.GetOutputBindingInfos();
470  for (size_t i = 0; i < numOutputs; i++)
471  {
472  const armnn::TensorInfo& infoOut = infosOut[i].second;
473 
474  // We've made sure before that the number of output files either equals numOutputs, in which case
475  // we override those files when processing the results of each iteration (only the result of the
476  // last iteration will be stored), or there are enough
477  // output files for each output of each iteration.
478  size_t outputFileIndex = x * numOutputs + i;
479  if (!params.m_OutputTensorFiles.empty())
480  {
481  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
482  ARMNN_LOG(info) << "Writing output " << i << " named: '"
483  << inferenceModelParams.m_OutputBindings[i]
484  << "' of iteration: " << x+1 << " to file: '"
485  << params.m_OutputTensorFiles[outputFileIndex] << "'";
486  }
487  auto outputTensorFile = params.m_OutputTensorFiles.empty()
488  ? ""
489  : params.m_OutputTensorFiles[outputFileIndex];
490 
491  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
492  infoOut,
493  outputTensorFile,
494  params.m_DequantizeOutput);
495  mapbox::util::apply_visitor(printer, outputs[x][i]);
496  }
497 
498  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
499  << std::fixed << inference_duration.count() << " ms\n";
500 
501  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
502  if (params.m_ThresholdTime != 0.0)
503  {
504  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
505  << std::fixed << params.m_ThresholdTime << " ms";
506  auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count();
507  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
508  << std::fixed << thresholdMinusInference << " ms" << "\n";
509 
510  if (thresholdMinusInference < 0)
511  {
512  std::string errorMessage = "Elapsed inference time is greater than provided threshold time.";
513  ARMNN_LOG(fatal) << errorMessage;
514  }
515  }
516  }
517  }
518  // Asynchronous execution using the Arm NN thread pool
519  else if (params.m_ThreadPoolSize >= 1)
520  {
521  try
522  {
523  ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n";
524  armnn::AsyncCallbackManager callbackManager;
525  std::unordered_map<armnn::InferenceId, std::vector<TContainer>&> inferenceOutputMap;
526 
527  // Declare the latest and earliest inference times here to be used when calculating overall time
528  std::chrono::high_resolution_clock::time_point earliestStartTime;
529  std::chrono::high_resolution_clock::time_point latestEndTime =
530  std::chrono::high_resolution_clock::now();
531 
532  // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
533  // LoadedNetwork with each scheduled inference having a specific priority
534  for (size_t i = 0; i < params.m_Iterations; ++i)
535  {
536  std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
537  inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
538  model.RunAsync(inputs[i], outputs[i], cb);
539  }
540 
541  // Check the results
542  unsigned int j = 0;
543  for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration)
544  {
545  auto cb = callbackManager.GetNotifiedCallback();
546 
547  // Get the results
548  auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
549  auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
550  auto inferenceDuration = endTime - startTime;
551 
552  if (latestEndTime < cb->GetEndTime())
553  {
554  latestEndTime = cb->GetEndTime();
555  }
556 
557  if (earliestStartTime.time_since_epoch().count() == 0)
558  {
559  earliestStartTime = cb->GetStartTime();
560  }
561  else if (earliestStartTime > cb->GetStartTime())
562  {
563  earliestStartTime = cb->GetStartTime();
564  }
565 
566  if (params.m_GenerateTensorData)
567  {
568  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
569  }
570 
571  // Print output tensors
572  const auto& infosOut = model.GetOutputBindingInfos();
573  for (size_t i = 0; i < numOutputs; i++)
574  {
575  // We've made sure before that the number of output files either equals numOutputs, in which
576  // case we override those files when processing the results of each iteration (only the result
577  // of the last iteration will be stored), or there are enough
578  // output files for each output of each iteration.
579  size_t outputFileIndex = iteration * numOutputs + i;
580  if (!params.m_OutputTensorFiles.empty())
581  {
582  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
583  ARMNN_LOG(info) << "Writing output " << i << " named: '"
584  << inferenceModelParams.m_OutputBindings[i]
585  << "' of iteration: " << iteration+1 << " to file: '"
586  << params.m_OutputTensorFiles[outputFileIndex] << "'";
587  }
588 
589  const armnn::TensorInfo& infoOut = infosOut[i].second;
590  auto outputTensorFile = params.m_OutputTensorFiles.empty()
591  ? ""
592  : params.m_OutputTensorFiles[outputFileIndex];
593 
594  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
595  infoOut,
596  outputTensorFile,
597  params.m_DequantizeOutput);
598  mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
599  }
600 
601  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
602  << std::fixed << inferenceDuration.count() << " ms\n";
603 
604  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
605  if (params.m_ThresholdTime != 0.0)
606  {
607  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
608  << std::fixed << params.m_ThresholdTime << " ms";
609  auto thresholdMinusInference =
610  params.m_ThresholdTime - duration<double, std::milli>(inferenceDuration).count();
611  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
612  << std::fixed << thresholdMinusInference << " ms" << "\n";
613 
614  if (thresholdMinusInference < 0)
615  {
616  ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
617  }
618  }
619  ++j;
620  }
621  //print duration difference between overallStartTime and overallEndTime
622  auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
623  auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
624  auto totalInferenceDuration = overallEndTime - overallStartTime;
625  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
626  << std::fixed << totalInferenceDuration.count() << " ms\n";
627  }
628  catch (const armnn::Exception& e)
629  {
630  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
631  return EXIT_FAILURE;
632  }
633  }
634  // Asynchronous execution using std::launch::async
635  else
636  {
637  try
638  {
639  ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n";
640  std::vector<std::future<std::tuple<unsigned int,
641  std::chrono::duration<double, std::milli>>>> inferenceResults;
642  inferenceResults.reserve(params.m_Iterations);
643 
644  // Create WorkingMemHandles for each inference
645  std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
646  workingMemHandles.reserve(params.m_Iterations);
647  for (unsigned int i = 0; i < params.m_Iterations; ++i)
648  {
649  workingMemHandles.push_back(model.CreateWorkingMemHandle());
650  }
651 
652  // Run each inference in its own thread
653  // start a timer
654  const auto start_time = armnn::GetTimeNow();
655  for (unsigned int i = 0; i < params.m_Iterations; ++i)
656  {
657  armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get();
658 
659  inferenceResults.push_back(std::async(
660  std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
661  return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
662  }
663  ));
664  }
665 
666  // Check the results
667  for (unsigned int j = 0; j < inferenceResults.size(); ++j)
668  {
669  // Get the results
670  auto inferenceResult = inferenceResults[j].get();
671  auto inferenceDuration = std::get<1>(inferenceResult);
672  auto inferenceID = std::get<0>(inferenceResult);
673 
674  if (params.m_GenerateTensorData)
675  {
676  ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
677  }
678 
679  // Print output tensors
680  const auto& infosOut = model.GetOutputBindingInfos();
681  for (size_t i = 0; i < numOutputs; i++)
682  {
683  // We've made sure before that the number of output files either equals numOutputs, in which
684  // case we override those files when processing the results of each iteration (only the result
685  // of the last iteration will be stored), or there are enough
686  // output files for each output of each iteration.
687  size_t outputFileIndex = j * numOutputs + i;
688  if (!params.m_OutputTensorFiles.empty())
689  {
690  outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size();
691  ARMNN_LOG(info) << "Writing output " << i << " named: '"
692  << inferenceModelParams.m_OutputBindings[i]
693  << "' of iteration: " << j+1 << " to file: '"
694  << params.m_OutputTensorFiles[outputFileIndex] << "'";
695  }
696  const armnn::TensorInfo& infoOut = infosOut[i].second;
697  auto outputTensorFile = params.m_OutputTensorFiles.empty()
698  ? ""
699  : params.m_OutputTensorFiles[outputFileIndex];
700 
701  TensorPrinter printer(inferenceModelParams.m_OutputBindings[i],
702  infoOut,
703  outputTensorFile,
704  params.m_DequantizeOutput);
705  mapbox::util::apply_visitor(printer, outputs[j][i]);
706  }
707 
708  ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2)
709  << std::fixed << inferenceDuration.count() << " ms\n";
710 
711  // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
712  if (params.m_ThresholdTime != 0.0)
713  {
714  ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2)
715  << std::fixed << params.m_ThresholdTime << " ms";
716  auto thresholdMinusInference = params.m_ThresholdTime - inferenceDuration.count();
717  ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2)
718  << std::fixed << thresholdMinusInference << " ms" << "\n";
719 
720  if (thresholdMinusInference < 0)
721  {
722  ARMNN_LOG(fatal) << "Elapsed inference time is greater than provided threshold time. \n";
723  }
724  }
725  ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n";
726 
727  }
728  // finish timer
729  const auto duration = armnn::GetTimeDuration(start_time);
730  ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2)
731  << std::fixed << duration.count() << " ms\n";
732  }
733  catch (const armnn::Exception& e)
734  {
735  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
736  return EXIT_FAILURE;
737  }
738  }
739  }
740  catch (const armnn::Exception& e)
741  {
742  ARMNN_LOG(fatal) << "Armnn Error: " << e.what();
743  return EXIT_FAILURE;
744  }
745 
746  return EXIT_SUCCESS;
747 }
748 
749 
750 // MAIN
751 int main(int argc, const char* argv[])
752 {
753  // Configures logging for both the ARMNN library and this test program.
754  #ifdef NDEBUG
756  #else
758  #endif
759  armnn::ConfigureLogging(true, true, level);
760 
761 
762  // Get ExecuteNetwork parameters and runtime options from command line
763  // This might throw an InvalidArgumentException if the user provided invalid inputs
765  try {
766  ProgramOptions.ParseOptions(argc, argv);
767  } catch (const std::exception &e){
768  ARMNN_LOG(fatal) << e.what();
769  return EXIT_FAILURE;
770  }
771 
772  if (ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut && !ProgramOptions.m_ExNetParams.m_EnableProfiling)
773  {
774  ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details";
775  return EXIT_FAILURE;
776  }
777 
778  // Create runtime
779  std::shared_ptr<armnn::IRuntime> runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions));
780 
781  std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat;
782 
783  // Forward to implementation based on the parser type
784  if (modelFormat.find("armnn") != std::string::npos)
785  {
786  #if defined(ARMNN_SERIALIZER)
787  return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.m_ExNetParams, runtime);
788  #else
789  ARMNN_LOG(fatal) << "Not built with serialization support.";
790  return EXIT_FAILURE;
791  #endif
792  }
793  else if (modelFormat.find("onnx") != std::string::npos)
794  {
795  #if defined(ARMNN_ONNX_PARSER)
796  return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.m_ExNetParams, runtime);
797  #else
798  ARMNN_LOG(fatal) << "Not built with Onnx parser support.";
799  return EXIT_FAILURE;
800  #endif
801  }
802  else if(modelFormat.find("tflite") != std::string::npos)
803  {
805  {
806  #if defined(ARMNN_TF_LITE_PARSER)
807  return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.m_ExNetParams, runtime);
808  #else
809  ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support.";
810  return EXIT_FAILURE;
811  #endif
812  }
813  else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
815  ProgramOptions.m_ExNetParams.m_TfLiteExecutor ==
817  {
818  #if defined(ARMNN_TF_LITE_DELEGATE)
819  return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, runtime);
820  #else
821  ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support.";
822  return EXIT_FAILURE;
823  #endif
824  }
825  }
826  else
827  {
828  ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat
829  << "'. Please include 'tflite' or 'onnx'";
830  return EXIT_FAILURE;
831  }
832 }
ExecuteNetworkParams m_ExNetParams
std::vector< std::string > m_InputTypes
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
Definition: Utils.cpp:18
virtual const char * what() const noexcept override
Definition: Exceptions.cpp:32
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
void PopulateTensorWithData(TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
std::vector< std::string > m_OutputTensorFiles
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
std::vector< std::string > m_InputNames
std::vector< std::string > m_InputTensorDataFilePaths
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers, unsigned int inferenceID)
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
TfLiteDelegate * TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::chrono::duration< double, std::milli > Run(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
int main(int argc, const char *argv[])
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
unsigned int GetInputSize(unsigned int inputIndex=0u) const
void TfLiteArmnnDelegateDelete(TfLiteDelegate *tfLiteDelegate)
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
Definition: Optional.hpp:305
int MainImpl(const ExecuteNetworkParams &params, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
LogSeverity
Definition: Utils.hpp:13
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()