ArmNN
 22.08
InferenceModel.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 
9 #include <armnn/ArmNN.hpp>
10 #include <armnn/Threadpool.hpp>
11 #include <armnn/Logging.hpp>
12 #include <armnn/utility/Timer.hpp>
14 #include <armnn/utility/Assert.hpp>
16 
19 
20 #include <common/include/ProfilingGuid.hpp>
21 
22 #if defined(ARMNN_SERIALIZER)
24 #endif
25 #if defined(ARMNN_TF_LITE_PARSER)
27 #endif
28 #if defined(ARMNN_ONNX_PARSER)
30 #endif
31 
33 #include <HeapProfiling.hpp>
34 #include <TensorIOUtils.hpp>
35 
37 #include <cxxopts/cxxopts.hpp>
38 #include "CxxoptsUtils.hpp"
39 #include <fmt/format.h>
40 #include <mapbox/variant.hpp>
41 
42 #include <algorithm>
43 #include <iterator>
44 #include <fstream>
45 #include <map>
46 #include <string>
47 #include <vector>
48 #include <type_traits>
49 
51 {
53 
54 using QuantizationParams = std::pair<float,int32_t>;
55 
56 struct Params
57 {
58  std::string m_ModelPath;
59  std::vector<std::string> m_InputBindings;
60  std::vector<armnn::TensorShape> m_InputShapes;
61  std::vector<std::string> m_OutputBindings;
62  std::vector<armnn::BackendId> m_ComputeDevices;
63  std::string m_DynamicBackendsPath;
64  size_t m_SubgraphId;
78  unsigned int m_NumberOfThreads;
79  std::string m_MLGOTuningFilePath;
83 
84 
86  : m_ComputeDevices{}
87  , m_SubgraphId(0)
88  , m_AllowExpandedDims(false)
89  , m_IsModelBinary(true)
91  , m_EnableFp16TurboMode(false)
92  , m_EnableBf16TurboMode(false)
94  , m_ParseUnsupported(false)
95  , m_InferOutputShape(false)
96  , m_EnableFastMath(false)
97  , m_SaveCachedNetwork(false)
101  , m_NumberOfThreads(0)
103  , m_AsyncEnabled(false)
104  , m_ThreadPoolSize(0)
105  , m_ImportInputsIfAligned(false)
106  {}
107 };
108 
109 } // namespace InferenceModelInternal
110 
111 template <typename IParser>
113 {
114 public:
116 
117  static armnn::INetworkPtr Create(const Params& params,
118  std::vector<armnn::BindingPointInfo>& inputBindings,
119  std::vector<armnn::BindingPointInfo>& outputBindings)
120  {
121  const std::string& modelPath = params.m_ModelPath;
122 
123  // Create a network from a file on disk
124  auto parser(IParser::Create());
125 
126  std::map<std::string, armnn::TensorShape> inputShapes;
127  if (!params.m_InputShapes.empty())
128  {
129  const size_t numInputShapes = params.m_InputShapes.size();
130  const size_t numInputBindings = params.m_InputBindings.size();
131  if (numInputShapes < numInputBindings)
132  {
133  throw armnn::Exception(fmt::format(
134  "Not every input has its tensor shape specified: expected={0}, got={1}",
135  numInputBindings, numInputShapes));
136  }
137 
138  for (size_t i = 0; i < numInputShapes; i++)
139  {
140  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
141  }
142  }
143 
144  std::vector<std::string> requestedOutputs = params.m_OutputBindings;
145  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
146 
147  {
148  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
149  // Handle text and binary input differently by calling the corresponding parser function
150  network = (params.m_IsModelBinary ?
151  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
152  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
153  }
154 
155  for (const std::string& inputLayerName : params.m_InputBindings)
156  {
157  inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
158  }
159 
160  for (const std::string& outputLayerName : params.m_OutputBindings)
161  {
162  outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
163  }
164 
165  return network;
166  }
167 };
168 
169 #if defined(ARMNN_SERIALIZER)
170 template <>
171 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
172 {
173 public:
174  using IParser = armnnDeserializer::IDeserializer;
176 
177  static armnn::INetworkPtr Create(const Params& params,
178  std::vector<armnn::BindingPointInfo>& inputBindings,
179  std::vector<armnn::BindingPointInfo>& outputBindings)
180  {
181  auto parser(IParser::Create());
182  ARMNN_ASSERT(parser);
183 
184  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
185 
186  {
187  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
188 
189  std::error_code errorCode;
190  fs::path pathToFile(params.m_ModelPath);
191  if (!fs::exists(pathToFile, errorCode))
192  {
193  throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}",
194  params.m_ModelPath,
195  errorCode.message(),
196  CHECK_LOCATION().AsString()));
197  }
198  std::ifstream file(params.m_ModelPath, std::ios::binary);
199 
200  network = parser->CreateNetworkFromBinary(file);
201  }
202 
203  unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId);
204 
205  for (const std::string& inputLayerName : params.m_InputBindings)
206  {
208  parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
209  inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
210  }
211 
212  for (const std::string& outputLayerName : params.m_OutputBindings)
213  {
215  parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
216  outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
217  }
218 
219  return network;
220  }
221 };
222 #endif
223 
224 #if defined(ARMNN_TF_LITE_PARSER)
225 template <>
226 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
227 {
228 public:
229  using IParser = armnnTfLiteParser::ITfLiteParser;
231 
232  static armnn::INetworkPtr Create(const Params& params,
233  std::vector<armnn::BindingPointInfo>& inputBindings,
234  std::vector<armnn::BindingPointInfo>& outputBindings)
235  {
236  const std::string& modelPath = params.m_ModelPath;
237 
238  // Create a network from a file on disk
239  IParser::TfLiteParserOptions options;
240  options.m_AllowExpandedDims = params.m_AllowExpandedDims;
241  options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
242  options.m_InferAndValidate = params.m_InferOutputShape;
243  auto parser(IParser::Create(options));
244 
245  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
246 
247  {
248  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
249  network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
250  }
251 
252  for (const std::string& inputLayerName : params.m_InputBindings)
253  {
254  armnn::BindingPointInfo inputBinding =
255  parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
256  inputBindings.push_back(inputBinding);
257  }
258 
259  for (const std::string& outputLayerName : params.m_OutputBindings)
260  {
261  armnn::BindingPointInfo outputBinding =
262  parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
263  outputBindings.push_back(outputBinding);
264  }
265 
266  return network;
267  }
268 };
269 #endif
270 
271 #if defined(ARMNN_ONNX_PARSER)
272 template <>
273 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
274 {
275 public:
276  using IParser = armnnOnnxParser::IOnnxParser;
279 
280  static armnn::INetworkPtr Create(const Params& params,
281  std::vector<BindingPointInfo>& inputBindings,
282  std::vector<BindingPointInfo>& outputBindings)
283  {
284  const std::string& modelPath = params.m_ModelPath;
285 
286  // Create a network from a file on disk
287  auto parser(IParser::Create());
288 
289  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
290 
291  std::map<std::string, armnn::TensorShape> inputShapes;
292  if (!params.m_InputShapes.empty())
293  {
294  const size_t numInputShapes = params.m_InputShapes.size();
295  const size_t numInputBindings = params.m_InputBindings.size();
296  if (numInputShapes < numInputBindings)
297  {
298  throw armnn::Exception(fmt::format(
299  "Not every input has its tensor shape specified: expected={0}, got={1}",
300  numInputBindings, numInputShapes));
301  }
302 
303  for (size_t i = 0; i < numInputShapes; i++)
304  {
305  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
306  }
307 
308  {
309  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
310  network = (params.m_IsModelBinary ?
311  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
312  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
313  }
314  }
315 
316  else
317  {
318  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
319  network = (params.m_IsModelBinary ?
320  parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
321  parser->CreateNetworkFromTextFile(modelPath.c_str()));
322  }
323 
324  for (const std::string& inputLayerName : params.m_InputBindings)
325  {
326  BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
327  inputBindings.push_back(inputBinding);
328  }
329 
330  for (const std::string& outputLayerName : params.m_OutputBindings)
331  {
332  BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
333  outputBindings.push_back(outputBinding);
334  }
335 
336  return network;
337  }
338 };
339 #endif
340 
341 
342 
343 template <typename IParser, typename TDataType>
345 {
346 public:
347  using DataType = TDataType;
350 
351 
353  {
354  std::string m_ModelDir;
355  std::vector<std::string> m_ComputeDevices;
360  std::string m_Labels;
361 
362  std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
363  {
364  std::vector<armnn::BackendId> backendIds;
365  std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
366  return backendIds;
367  }
368  };
369 
370  static void AddCommandLineOptions(cxxopts::Options& options,
371  CommandLineOptions& cLineOptions, std::vector<std::string>& required)
372  {
373  const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
374 
375  const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
377 
378  options
379  .allow_unrecognised_options()
380  .add_options()
381  ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
382  cxxopts::value<std::string>(cLineOptions.m_ModelDir))
383  ("c,compute", backendsMessage.c_str(),
384  cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
385  ("b,dynamic-backends-path",
386  "Path where to load any available dynamic backend from. "
387  "If left empty (the default), dynamic backends will not be used.",
388  cxxopts::value(cLineOptions.m_DynamicBackendsPath))
389  ("l,labels",
390  "Text file containing one image filename - correct label pair per line, "
391  "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
392  ("v,visualize-optimized-model",
393  "Produce a dot file useful for visualizing the graph post optimization."
394  "The file will have the same name as the model with the .dot extention.",
395  cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
396  ("fp16-turbo-mode",
397  "If this option is enabled FP32 layers, weights and biases will be converted "
398  "to FP16 where the backend supports it.",
399  cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
400  ("bf16-turbo-mode",
401  "If this option is enabled FP32 layers, weights and biases will be converted "
402  "to BF16 where the backend supports it.",
403  cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
404 
405  required.emplace_back("model-dir");
406  }
407 
408  InferenceModel(const Params& params,
409  bool enableProfiling,
410  const std::string& dynamicBackendsPath,
411  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
412  : m_EnableProfiling(enableProfiling),
413  m_ProfilingDetailsMethod(armnn::ProfilingDetailsMethod::Undefined),
414  m_DynamicBackendsPath(dynamicBackendsPath),
416  {
417  if (runtime)
418  {
419  m_Runtime = runtime;
420  }
421  else
422  {
424  options.m_EnableGpuProfiling = m_EnableProfiling;
426  m_Runtime = armnn::IRuntime::Create(options);
427  }
428 
429  // Configure the Profiler if the the profiling details are opted for
430  if (params.m_OutputDetailsOnlyToStdOut)
431  m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
432  else if (params.m_OutputDetailsToStdOut)
433  m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
434 
435  std::string invalidBackends;
437  {
438  throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
439  }
440 
442  {
443  const auto parsing_start_time = armnn::GetTimeNow();
445 
446  ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
447  << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms.";
448 
449  ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
450 
451  armnn::OptimizerOptions options;
452  options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
453  options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
454  options.m_Debug = params.m_PrintIntermediateLayers;
455  options.m_shapeInferenceMethod = params.m_InferOutputShape ?
457  options.m_ProfilingEnabled = m_EnableProfiling;
458 
459  armnn::BackendOptions gpuAcc("GpuAcc",
460  {
461  { "FastMathEnabled", params.m_EnableFastMath },
462  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
463  { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
464  { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
465  });
466 
467  armnn::BackendOptions cpuAcc("CpuAcc",
468  {
469  { "FastMathEnabled", params.m_EnableFastMath },
470  { "NumberOfThreads", params.m_NumberOfThreads }
471  });
472  options.m_ModelOptions.push_back(gpuAcc);
473  options.m_ModelOptions.push_back(cpuAcc);
474 
475  const auto optimization_start_time = armnn::GetTimeNow();
476  optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
477 
478  ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
479  << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms.";
480 
481  if (!optNet)
482  {
483  throw armnn::Exception("Optimize returned nullptr");
484  }
485 
486 
487  }
488 
490  {
491  fs::path filename = params.m_ModelPath;
492  filename.replace_extension("dot");
493  std::fstream file(filename.c_str(), std::ios_base::out);
494  optNet->SerializeToDot(file);
495  }
496 
497  armnn::Status ret;
498  {
499  ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
500 
501  const auto loading_start_time = armnn::GetTimeNow();
502  armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
505  enableProfiling,
506  m_ProfilingDetailsMethod);
507  std::string errorMessage;
508  ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
509 
510  ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
511  << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms.";
512 
513  if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0)
514  {
515  std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
516  for (size_t i = 0; i < params.m_ThreadPoolSize; ++i)
517  {
518  memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
519  }
520 
521  m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize,
522  m_Runtime.get(),
523  memHandles);
524  }
525  }
526 
527  if (ret == armnn::Status::Failure)
528  {
529  throw armnn::Exception("IRuntime::LoadNetwork failed");
530  }
531  }
532 
533  void CheckInputIndexIsValid(unsigned int inputIndex) const
534  {
535  if (m_InputBindings.size() < inputIndex + 1)
536  {
537  throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
538  }
539  }
540 
541  void CheckOutputIndexIsValid(unsigned int outputIndex) const
542  {
543  if (m_OutputBindings.size() < outputIndex + 1)
544  {
545  throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
546  }
547  }
548 
549  unsigned int GetInputSize(unsigned int inputIndex = 0u) const
550  {
551  CheckInputIndexIsValid(inputIndex);
552  return m_InputBindings[inputIndex].second.GetNumElements();
553  }
554 
555  unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
556  {
557  CheckOutputIndexIsValid(outputIndex);
558  return m_OutputBindings[outputIndex].second.GetNumElements();
559  }
560 
561  std::chrono::duration<double, std::milli> Run(
562  const std::vector<armnnUtils::TContainer>& inputContainers,
563  std::vector<armnnUtils::TContainer>& outputContainers)
564  {
565  for (unsigned int i = 0; i < outputContainers.size(); ++i)
566  {
567  const unsigned int expectedOutputDataSize = GetOutputSize(i);
568 
569  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
570  {
571  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
572  if (actualOutputDataSize < expectedOutputDataSize)
573  {
574  unsigned int outputIndex = i;
575  throw armnn::Exception(
576  fmt::format("Not enough data for output #{0}: expected "
577  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
578  }
579  },
580  outputContainers[i]);
581  }
582 
583  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
584 
585  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
586  const auto start_time = armnn::GetTimeNow();
587 
588  armnn::Status ret;
590  {
591  std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs(
592  m_NetworkIdentifier, MakeInputTensors(inputContainers), armnn::MemorySource::Malloc);
593 
594  std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs(
595  m_NetworkIdentifier, MakeOutputTensors(outputContainers), armnn::MemorySource::Malloc);
596 
597  ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
598  MakeInputTensors(inputContainers),
599  MakeOutputTensors(outputContainers),
600  importedInputIds,
601  importedOutputIds);
602  }
603  else
604  {
605  ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
606  MakeInputTensors(inputContainers),
607  MakeOutputTensors(outputContainers));
608  }
609  const auto duration = armnn::GetTimeDuration(start_time);
610 
611  // if profiling is enabled print out the results
612  if (profiler && profiler->IsProfilingEnabled())
613  {
614  profiler->Print(std::cout);
615  }
616 
617  if (ret == armnn::Status::Failure)
618  {
619  throw armnn::Exception("IRuntime::EnqueueWorkload failed");
620  }
621  else
622  {
623  return duration;
624  }
625  }
626 
627  std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync(
628  armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
629  const std::vector<armnnUtils::TContainer>& inputContainers,
630  std::vector<armnnUtils::TContainer>& outputContainers,
631  unsigned int inferenceID)
632  {
633  for (unsigned int i = 0; i < outputContainers.size(); ++i)
634  {
635  const unsigned int expectedOutputDataSize = GetOutputSize(i);
636 
637  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
638  {
639  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
640  if (actualOutputDataSize < expectedOutputDataSize)
641  {
642  unsigned int outputIndex = i;
643  throw armnn::Exception(
644  fmt::format("Not enough data for output #{0}: expected "
645  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
646  }
647  },
648  outputContainers[i]);
649  }
650 
651  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
652 
653  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
654  const auto start_time = armnn::GetTimeNow();
655 
656  armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
657  MakeInputTensors(inputContainers),
658  MakeOutputTensors(outputContainers));
659 
660  const auto duration = armnn::GetTimeDuration(start_time);
661 
662  // if profiling is enabled print out the results
663  if (profiler && profiler->IsProfilingEnabled())
664  {
665  profiler->Print(std::cout);
666  }
667 
668  if (ret == armnn::Status::Failure)
669  {
670  throw armnn::Exception(
671  fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
672  m_NetworkIdentifier, inferenceID));
673  }
674  else
675  {
676  return std::make_tuple(inferenceID, duration);
677  }
678  }
679 
680  void RunAsync(const std::vector<armnnUtils::TContainer>& inputContainers,
681  std::vector<armnnUtils::TContainer>& outputContainers,
682  std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
683  {
684  for (unsigned int i = 0; i < outputContainers.size(); ++i)
685  {
686  const unsigned int expectedOutputDataSize = GetOutputSize(i);
687 
688  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
689  {
690  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
691  if (actualOutputDataSize < expectedOutputDataSize)
692  {
693  unsigned int outputIndex = i;
694  throw armnn::Exception(
695  fmt::format("Not enough data for output #{0}: expected "
696  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
697  }
698  },
699  outputContainers[i]);
700  }
701 
702  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
703 
704  m_Threadpool->Schedule(m_NetworkIdentifier,
705  MakeInputTensors(inputContainers),
706  MakeOutputTensors(outputContainers),
708  cb);
709 
710  // if profiling is enabled print out the results
711  if (profiler && profiler->IsProfilingEnabled())
712  {
713  profiler->Print(std::cout);
714  }
715  }
716 
717  const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
718  {
719  CheckInputIndexIsValid(inputIndex);
720  return m_InputBindings[inputIndex];
721  }
722 
723  const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
724  {
725  return m_InputBindings;
726  }
727 
728  const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
729  {
730  CheckOutputIndexIsValid(outputIndex);
731  return m_OutputBindings[outputIndex];
732  }
733 
734  const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
735  {
736  return m_OutputBindings;
737  }
738 
739  QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
740  {
741  CheckOutputIndexIsValid(outputIndex);
742  return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
743  m_OutputBindings[outputIndex].second.GetQuantizationOffset());
744  }
745 
746  QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
747  {
748  CheckInputIndexIsValid(inputIndex);
749  return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
750  m_InputBindings[inputIndex].second.GetQuantizationOffset());
751  }
752 
753  std::vector<QuantizationParams> GetAllQuantizationParams() const
754  {
755  std::vector<QuantizationParams> quantizationParams;
756  for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
757  {
758  quantizationParams.push_back(GetQuantizationParams(i));
759  }
760  return quantizationParams;
761  }
762 
763  std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
764  {
765  return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
766  }
767 
768 private:
769  armnn::NetworkId m_NetworkIdentifier;
770  std::shared_ptr<armnn::IRuntime> m_Runtime;
771  std::unique_ptr<armnn::Threadpool> m_Threadpool;
772 
773  std::vector<armnn::BindingPointInfo> m_InputBindings;
774  std::vector<armnn::BindingPointInfo> m_OutputBindings;
775  bool m_EnableProfiling;
776  armnn::ProfilingDetailsMethod m_ProfilingDetailsMethod;
777  std::string m_DynamicBackendsPath;
779 
780  template<typename TContainer>
781  armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
782  {
783  return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
784  }
785 
786  template<typename TContainer>
787  armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
788  {
789  return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
790  }
791 };
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
ModelOptions m_ModelOptions
Definition: INetwork.hpp:227
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:49
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
ShapeInferenceMethod m_shapeInferenceMethod
Definition: INetwork.hpp:221
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:205
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:246
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
BackendRegistry & BackendRegistryInstance()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
Definition: INetwork.hpp:218
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
Copyright (c) 2021 ARM Limited and Contributors.
InferenceModelInternal::QuantizationParams QuantizationParams
std::string GetBackendIdsAsString() const
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:71
void CheckInputIndexIsValid(unsigned int inputIndex) const
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
Definition: INetwork.hpp:208
InferenceModel(const Params &params, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1864
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
void RunAsync(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:27
bool CheckRequestedBackendsAreValid(const std::vector< armnn::BackendId > &backendIds, armnn::Optional< std::string &> invalidBackendIds=armnn::EmptyOptional())
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
Status
enumeration
Definition: Types.hpp:42
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:239
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:127
#define CHECK_LOCATION()
Definition: Exceptions.hpp:203
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:98
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:93
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
Definition: Tensor.hpp:274
static armnn::INetworkPtr Create(const Params &params, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:238
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const