ArmNN
 22.05
InferenceModel.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 
9 #include <armnn/ArmNN.hpp>
10 #include <armnn/Threadpool.hpp>
11 #include <armnn/Logging.hpp>
12 #include <armnn/utility/Timer.hpp>
14 #include <armnn/utility/Assert.hpp>
16 
18 
19 #include <common/include/ProfilingGuid.hpp>
20 
21 #if defined(ARMNN_SERIALIZER)
23 #endif
24 #if defined(ARMNN_TF_LITE_PARSER)
26 #endif
27 #if defined(ARMNN_ONNX_PARSER)
29 #endif
30 
32 #include <HeapProfiling.hpp>
33 #include <TensorIOUtils.hpp>
34 
36 #include <cxxopts/cxxopts.hpp>
37 #include "CxxoptsUtils.hpp"
38 #include <fmt/format.h>
39 #include <mapbox/variant.hpp>
40 
41 #include <algorithm>
42 #include <iterator>
43 #include <fstream>
44 #include <map>
45 #include <string>
46 #include <vector>
47 #include <type_traits>
48 
49 namespace
50 {
51 
52 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
54 {
55  if (backendIds.empty())
56  {
57  return false;
58  }
59 
61 
62  bool allValid = true;
63  for (const auto& backendId : backendIds)
64  {
65  if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
66  {
67  allValid = false;
68  if (invalidBackendIds)
69  {
70  if (!invalidBackendIds.value().empty())
71  {
72  invalidBackendIds.value() += ", ";
73  }
74  invalidBackendIds.value() += backendId;
75  }
76  }
77  }
78  return allValid;
79 }
80 
81 } // anonymous namespace
82 
84 {
86 
87 using QuantizationParams = std::pair<float,int32_t>;
88 
89 struct Params
90 {
91  std::string m_ModelPath;
92  std::vector<std::string> m_InputBindings;
93  std::vector<armnn::TensorShape> m_InputShapes;
94  std::vector<std::string> m_OutputBindings;
95  std::vector<armnn::BackendId> m_ComputeDevices;
96  std::string m_DynamicBackendsPath;
97  size_t m_SubgraphId;
111  unsigned int m_NumberOfThreads;
112  std::string m_MLGOTuningFilePath;
116 
117 
119  : m_ComputeDevices{}
120  , m_SubgraphId(0)
121  , m_AllowExpandedDims(false)
122  , m_IsModelBinary(true)
124  , m_EnableFp16TurboMode(false)
125  , m_EnableBf16TurboMode(false)
127  , m_ParseUnsupported(false)
128  , m_InferOutputShape(false)
129  , m_EnableFastMath(false)
130  , m_SaveCachedNetwork(false)
131  , m_OutputDetailsToStdOut(false)
134  , m_NumberOfThreads(0)
136  , m_AsyncEnabled(false)
137  , m_ThreadPoolSize(0)
138  , m_ImportInputsIfAligned(false)
139  {}
140 };
141 
142 } // namespace InferenceModelInternal
143 
144 template <typename IParser>
146 {
147 public:
149 
150  static armnn::INetworkPtr Create(const Params& params,
151  std::vector<armnn::BindingPointInfo>& inputBindings,
152  std::vector<armnn::BindingPointInfo>& outputBindings)
153  {
154  const std::string& modelPath = params.m_ModelPath;
155 
156  // Create a network from a file on disk
157  auto parser(IParser::Create());
158 
159  std::map<std::string, armnn::TensorShape> inputShapes;
160  if (!params.m_InputShapes.empty())
161  {
162  const size_t numInputShapes = params.m_InputShapes.size();
163  const size_t numInputBindings = params.m_InputBindings.size();
164  if (numInputShapes < numInputBindings)
165  {
166  throw armnn::Exception(fmt::format(
167  "Not every input has its tensor shape specified: expected={0}, got={1}",
168  numInputBindings, numInputShapes));
169  }
170 
171  for (size_t i = 0; i < numInputShapes; i++)
172  {
173  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
174  }
175  }
176 
177  std::vector<std::string> requestedOutputs = params.m_OutputBindings;
178  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
179 
180  {
181  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
182  // Handle text and binary input differently by calling the corresponding parser function
183  network = (params.m_IsModelBinary ?
184  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
185  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
186  }
187 
188  for (const std::string& inputLayerName : params.m_InputBindings)
189  {
190  inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
191  }
192 
193  for (const std::string& outputLayerName : params.m_OutputBindings)
194  {
195  outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
196  }
197 
198  return network;
199  }
200 };
201 
202 #if defined(ARMNN_SERIALIZER)
203 template <>
204 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
205 {
206 public:
207  using IParser = armnnDeserializer::IDeserializer;
209 
210  static armnn::INetworkPtr Create(const Params& params,
211  std::vector<armnn::BindingPointInfo>& inputBindings,
212  std::vector<armnn::BindingPointInfo>& outputBindings)
213  {
214  auto parser(IParser::Create());
215  ARMNN_ASSERT(parser);
216 
217  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
218 
219  {
220  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
221 
222  std::error_code errorCode;
223  fs::path pathToFile(params.m_ModelPath);
224  if (!fs::exists(pathToFile, errorCode))
225  {
226  throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}",
227  params.m_ModelPath,
228  errorCode.message(),
229  CHECK_LOCATION().AsString()));
230  }
231  std::ifstream file(params.m_ModelPath, std::ios::binary);
232 
233  network = parser->CreateNetworkFromBinary(file);
234  }
235 
236  unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId);
237 
238  for (const std::string& inputLayerName : params.m_InputBindings)
239  {
241  parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
242  inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
243  }
244 
245  for (const std::string& outputLayerName : params.m_OutputBindings)
246  {
248  parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
249  outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
250  }
251 
252  return network;
253  }
254 };
255 #endif
256 
257 #if defined(ARMNN_TF_LITE_PARSER)
258 template <>
259 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
260 {
261 public:
262  using IParser = armnnTfLiteParser::ITfLiteParser;
264 
265  static armnn::INetworkPtr Create(const Params& params,
266  std::vector<armnn::BindingPointInfo>& inputBindings,
267  std::vector<armnn::BindingPointInfo>& outputBindings)
268  {
269  const std::string& modelPath = params.m_ModelPath;
270 
271  // Create a network from a file on disk
272  IParser::TfLiteParserOptions options;
273  options.m_AllowExpandedDims = params.m_AllowExpandedDims;
274  options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
275  options.m_InferAndValidate = params.m_InferOutputShape;
276  auto parser(IParser::Create(options));
277 
278  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
279 
280  {
281  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
282  network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
283  }
284 
285  for (const std::string& inputLayerName : params.m_InputBindings)
286  {
287  armnn::BindingPointInfo inputBinding =
288  parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
289  inputBindings.push_back(inputBinding);
290  }
291 
292  for (const std::string& outputLayerName : params.m_OutputBindings)
293  {
294  armnn::BindingPointInfo outputBinding =
295  parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
296  outputBindings.push_back(outputBinding);
297  }
298 
299  return network;
300  }
301 };
302 #endif
303 
304 #if defined(ARMNN_ONNX_PARSER)
305 template <>
306 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
307 {
308 public:
309  using IParser = armnnOnnxParser::IOnnxParser;
312 
313  static armnn::INetworkPtr Create(const Params& params,
314  std::vector<BindingPointInfo>& inputBindings,
315  std::vector<BindingPointInfo>& outputBindings)
316  {
317  const std::string& modelPath = params.m_ModelPath;
318 
319  // Create a network from a file on disk
320  auto parser(IParser::Create());
321 
322  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
323 
324  std::map<std::string, armnn::TensorShape> inputShapes;
325  if (!params.m_InputShapes.empty())
326  {
327  const size_t numInputShapes = params.m_InputShapes.size();
328  const size_t numInputBindings = params.m_InputBindings.size();
329  if (numInputShapes < numInputBindings)
330  {
331  throw armnn::Exception(fmt::format(
332  "Not every input has its tensor shape specified: expected={0}, got={1}",
333  numInputBindings, numInputShapes));
334  }
335 
336  for (size_t i = 0; i < numInputShapes; i++)
337  {
338  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
339  }
340 
341  {
342  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
343  network = (params.m_IsModelBinary ?
344  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
345  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
346  }
347  }
348 
349  else
350  {
351  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
352  network = (params.m_IsModelBinary ?
353  parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
354  parser->CreateNetworkFromTextFile(modelPath.c_str()));
355  }
356 
357  for (const std::string& inputLayerName : params.m_InputBindings)
358  {
359  BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
360  inputBindings.push_back(inputBinding);
361  }
362 
363  for (const std::string& outputLayerName : params.m_OutputBindings)
364  {
365  BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
366  outputBindings.push_back(outputBinding);
367  }
368 
369  return network;
370  }
371 };
372 #endif
373 
374 
375 
376 template <typename IParser, typename TDataType>
378 {
379 public:
380  using DataType = TDataType;
383 
384 
386  {
387  std::string m_ModelDir;
388  std::vector<std::string> m_ComputeDevices;
393  std::string m_Labels;
394 
395  std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
396  {
397  std::vector<armnn::BackendId> backendIds;
398  std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
399  return backendIds;
400  }
401  };
402 
403  static void AddCommandLineOptions(cxxopts::Options& options,
404  CommandLineOptions& cLineOptions, std::vector<std::string>& required)
405  {
406  const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
407 
408  const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
410 
411  options
412  .allow_unrecognised_options()
413  .add_options()
414  ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
415  cxxopts::value<std::string>(cLineOptions.m_ModelDir))
416  ("c,compute", backendsMessage.c_str(),
417  cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
418  ("b,dynamic-backends-path",
419  "Path where to load any available dynamic backend from. "
420  "If left empty (the default), dynamic backends will not be used.",
421  cxxopts::value(cLineOptions.m_DynamicBackendsPath))
422  ("l,labels",
423  "Text file containing one image filename - correct label pair per line, "
424  "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
425  ("v,visualize-optimized-model",
426  "Produce a dot file useful for visualizing the graph post optimization."
427  "The file will have the same name as the model with the .dot extention.",
428  cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
429  ("fp16-turbo-mode",
430  "If this option is enabled FP32 layers, weights and biases will be converted "
431  "to FP16 where the backend supports it.",
432  cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
433  ("bf16-turbo-mode",
434  "If this option is enabled FP32 layers, weights and biases will be converted "
435  "to BF16 where the backend supports it.",
436  cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
437 
438  required.emplace_back("model-dir");
439  }
440 
441  InferenceModel(const Params& params,
442  bool enableProfiling,
443  const std::string& dynamicBackendsPath,
444  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
445  : m_EnableProfiling(enableProfiling),
446  m_ProfilingDetailsMethod(armnn::ProfilingDetailsMethod::Undefined),
447  m_DynamicBackendsPath(dynamicBackendsPath),
449  {
450  if (runtime)
451  {
452  m_Runtime = runtime;
453  }
454  else
455  {
457  options.m_EnableGpuProfiling = m_EnableProfiling;
459  m_Runtime = armnn::IRuntime::Create(options);
460  }
461 
462  // Configure the Profiler if the the profiling details are opted for
463  if (params.m_OutputDetailsOnlyToStdOut)
464  m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
465  else if (params.m_OutputDetailsToStdOut)
466  m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
467 
468  std::string invalidBackends;
469  if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
470  {
471  throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
472  }
473 
475  {
476  const auto parsing_start_time = armnn::GetTimeNow();
478 
479  ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
480  << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms.";
481 
482  ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
483 
484  armnn::OptimizerOptions options;
485  options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
486  options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
487  options.m_Debug = params.m_PrintIntermediateLayers;
488  options.m_shapeInferenceMethod = params.m_InferOutputShape ?
490  options.m_ProfilingEnabled = m_EnableProfiling;
491 
492  armnn::BackendOptions gpuAcc("GpuAcc",
493  {
494  { "FastMathEnabled", params.m_EnableFastMath },
495  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
496  { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
497  { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
498  });
499 
500  armnn::BackendOptions cpuAcc("CpuAcc",
501  {
502  { "FastMathEnabled", params.m_EnableFastMath },
503  { "NumberOfThreads", params.m_NumberOfThreads }
504  });
505  options.m_ModelOptions.push_back(gpuAcc);
506  options.m_ModelOptions.push_back(cpuAcc);
507 
508  const auto optimization_start_time = armnn::GetTimeNow();
509  optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
510 
511  ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
512  << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms.";
513 
514  if (!optNet)
515  {
516  throw armnn::Exception("Optimize returned nullptr");
517  }
518 
519 
520  }
521 
523  {
524  fs::path filename = params.m_ModelPath;
525  filename.replace_extension("dot");
526  std::fstream file(filename.c_str(), std::ios_base::out);
527  optNet->SerializeToDot(file);
528  }
529 
530  armnn::Status ret;
531  {
532  ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
533 
534  const auto loading_start_time = armnn::GetTimeNow();
535  armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
538  enableProfiling,
539  m_ProfilingDetailsMethod);
540  std::string errorMessage;
541  ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
542 
543  ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
544  << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms.";
545 
546  if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0)
547  {
548  std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
549  for (size_t i = 0; i < params.m_ThreadPoolSize; ++i)
550  {
551  memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
552  }
553 
554  m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize,
555  m_Runtime.get(),
556  memHandles);
557  }
558  }
559 
560  if (ret == armnn::Status::Failure)
561  {
562  throw armnn::Exception("IRuntime::LoadNetwork failed");
563  }
564  }
565 
566  void CheckInputIndexIsValid(unsigned int inputIndex) const
567  {
568  if (m_InputBindings.size() < inputIndex + 1)
569  {
570  throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
571  }
572  }
573 
574  void CheckOutputIndexIsValid(unsigned int outputIndex) const
575  {
576  if (m_OutputBindings.size() < outputIndex + 1)
577  {
578  throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
579  }
580  }
581 
582  unsigned int GetInputSize(unsigned int inputIndex = 0u) const
583  {
584  CheckInputIndexIsValid(inputIndex);
585  return m_InputBindings[inputIndex].second.GetNumElements();
586  }
587 
588  unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
589  {
590  CheckOutputIndexIsValid(outputIndex);
591  return m_OutputBindings[outputIndex].second.GetNumElements();
592  }
593 
594  std::chrono::duration<double, std::milli> Run(
595  const std::vector<armnnUtils::TContainer>& inputContainers,
596  std::vector<armnnUtils::TContainer>& outputContainers)
597  {
598  for (unsigned int i = 0; i < outputContainers.size(); ++i)
599  {
600  const unsigned int expectedOutputDataSize = GetOutputSize(i);
601 
602  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
603  {
604  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
605  if (actualOutputDataSize < expectedOutputDataSize)
606  {
607  unsigned int outputIndex = i;
608  throw armnn::Exception(
609  fmt::format("Not enough data for output #{0}: expected "
610  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
611  }
612  },
613  outputContainers[i]);
614  }
615 
616  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
617 
618  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
619  const auto start_time = armnn::GetTimeNow();
620 
621  armnn::Status ret;
623  {
624  std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs(
625  m_NetworkIdentifier, MakeInputTensors(inputContainers), armnn::MemorySource::Malloc);
626 
627  std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs(
628  m_NetworkIdentifier, MakeOutputTensors(outputContainers), armnn::MemorySource::Malloc);
629 
630  ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
631  MakeInputTensors(inputContainers),
632  MakeOutputTensors(outputContainers),
633  importedInputIds,
634  importedOutputIds);
635  }
636  else
637  {
638  ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
639  MakeInputTensors(inputContainers),
640  MakeOutputTensors(outputContainers));
641  }
642  const auto duration = armnn::GetTimeDuration(start_time);
643 
644  // if profiling is enabled print out the results
645  if (profiler && profiler->IsProfilingEnabled())
646  {
647  profiler->Print(std::cout);
648  }
649 
650  if (ret == armnn::Status::Failure)
651  {
652  throw armnn::Exception("IRuntime::EnqueueWorkload failed");
653  }
654  else
655  {
656  return duration;
657  }
658  }
659 
660  std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync(
661  armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
662  const std::vector<armnnUtils::TContainer>& inputContainers,
663  std::vector<armnnUtils::TContainer>& outputContainers,
664  unsigned int inferenceID)
665  {
666  for (unsigned int i = 0; i < outputContainers.size(); ++i)
667  {
668  const unsigned int expectedOutputDataSize = GetOutputSize(i);
669 
670  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
671  {
672  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
673  if (actualOutputDataSize < expectedOutputDataSize)
674  {
675  unsigned int outputIndex = i;
676  throw armnn::Exception(
677  fmt::format("Not enough data for output #{0}: expected "
678  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
679  }
680  },
681  outputContainers[i]);
682  }
683 
684  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
685 
686  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
687  const auto start_time = armnn::GetTimeNow();
688 
689  armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
690  MakeInputTensors(inputContainers),
691  MakeOutputTensors(outputContainers));
692 
693  const auto duration = armnn::GetTimeDuration(start_time);
694 
695  // if profiling is enabled print out the results
696  if (profiler && profiler->IsProfilingEnabled())
697  {
698  profiler->Print(std::cout);
699  }
700 
701  if (ret == armnn::Status::Failure)
702  {
703  throw armnn::Exception(
704  fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
705  m_NetworkIdentifier, inferenceID));
706  }
707  else
708  {
709  return std::make_tuple(inferenceID, duration);
710  }
711  }
712 
713  void RunAsync(const std::vector<armnnUtils::TContainer>& inputContainers,
714  std::vector<armnnUtils::TContainer>& outputContainers,
715  std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
716  {
717  for (unsigned int i = 0; i < outputContainers.size(); ++i)
718  {
719  const unsigned int expectedOutputDataSize = GetOutputSize(i);
720 
721  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
722  {
723  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
724  if (actualOutputDataSize < expectedOutputDataSize)
725  {
726  unsigned int outputIndex = i;
727  throw armnn::Exception(
728  fmt::format("Not enough data for output #{0}: expected "
729  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
730  }
731  },
732  outputContainers[i]);
733  }
734 
735  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
736 
737  m_Threadpool->Schedule(m_NetworkIdentifier,
738  MakeInputTensors(inputContainers),
739  MakeOutputTensors(outputContainers),
741  cb);
742 
743  // if profiling is enabled print out the results
744  if (profiler && profiler->IsProfilingEnabled())
745  {
746  profiler->Print(std::cout);
747  }
748  }
749 
750  const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
751  {
752  CheckInputIndexIsValid(inputIndex);
753  return m_InputBindings[inputIndex];
754  }
755 
756  const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
757  {
758  return m_InputBindings;
759  }
760 
761  const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
762  {
763  CheckOutputIndexIsValid(outputIndex);
764  return m_OutputBindings[outputIndex];
765  }
766 
767  const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
768  {
769  return m_OutputBindings;
770  }
771 
772  QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
773  {
774  CheckOutputIndexIsValid(outputIndex);
775  return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
776  m_OutputBindings[outputIndex].second.GetQuantizationOffset());
777  }
778 
779  QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
780  {
781  CheckInputIndexIsValid(inputIndex);
782  return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
783  m_InputBindings[inputIndex].second.GetQuantizationOffset());
784  }
785 
786  std::vector<QuantizationParams> GetAllQuantizationParams() const
787  {
788  std::vector<QuantizationParams> quantizationParams;
789  for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
790  {
791  quantizationParams.push_back(GetQuantizationParams(i));
792  }
793  return quantizationParams;
794  }
795 
796  std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
797  {
798  return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
799  }
800 
801 private:
802  armnn::NetworkId m_NetworkIdentifier;
803  std::shared_ptr<armnn::IRuntime> m_Runtime;
804  std::unique_ptr<armnn::Threadpool> m_Threadpool;
805 
806  std::vector<armnn::BindingPointInfo> m_InputBindings;
807  std::vector<armnn::BindingPointInfo> m_OutputBindings;
808  bool m_EnableProfiling;
809  armnn::ProfilingDetailsMethod m_ProfilingDetailsMethod;
810  std::string m_DynamicBackendsPath;
812 
813  template<typename TContainer>
814  armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
815  {
816  return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
817  }
818 
819  template<typename TContainer>
820  armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
821  {
822  return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
823  }
824 };
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
ModelOptions m_ModelOptions
Definition: INetwork.hpp:233
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
ShapeInferenceMethod m_shapeInferenceMethod
Definition: INetwork.hpp:227
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:193
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:249
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
BackendRegistry & BackendRegistryInstance()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
Definition: INetwork.hpp:224
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
Copyright (c) 2021 ARM Limited and Contributors.
InferenceModelInternal::QuantizationParams QuantizationParams
std::string GetBackendIdsAsString() const
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:71
void CheckInputIndexIsValid(unsigned int inputIndex) const
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
Definition: INetwork.hpp:214
InferenceModel(const Params &params, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1847
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
void RunAsync(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:27
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
Status
enumeration
Definition: Types.hpp:42
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:242
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:137
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:98
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:93
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
Definition: Tensor.hpp:274
static armnn::INetworkPtr Create(const Params &params, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:241
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const