ArmNN
 21.11
InferenceModel.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 
9 #include <armnn/ArmNN.hpp>
10 #include <armnn/Threadpool.hpp>
11 #include <armnn/Logging.hpp>
12 #include <armnn/utility/Timer.hpp>
14 #include <armnn/utility/Assert.hpp>
16 
18 
19 #include <common/include/ProfilingGuid.hpp>
20 
21 #if defined(ARMNN_SERIALIZER)
23 #endif
24 #if defined(ARMNN_TF_LITE_PARSER)
26 #endif
27 #if defined(ARMNN_ONNX_PARSER)
29 #endif
30 
32 #include <HeapProfiling.hpp>
33 #include <TensorIOUtils.hpp>
34 
36 #include <cxxopts/cxxopts.hpp>
37 #include "CxxoptsUtils.hpp"
38 #include <fmt/format.h>
39 #include <mapbox/variant.hpp>
40 
41 #include <algorithm>
42 #include <iterator>
43 #include <fstream>
44 #include <map>
45 #include <string>
46 #include <vector>
47 #include <type_traits>
48 
49 namespace
50 {
51 
52 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
54 {
55  if (backendIds.empty())
56  {
57  return false;
58  }
59 
61 
62  bool allValid = true;
63  for (const auto& backendId : backendIds)
64  {
65  if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
66  {
67  allValid = false;
68  if (invalidBackendIds)
69  {
70  if (!invalidBackendIds.value().empty())
71  {
72  invalidBackendIds.value() += ", ";
73  }
74  invalidBackendIds.value() += backendId;
75  }
76  }
77  }
78  return allValid;
79 }
80 
81 } // anonymous namespace
82 
84 {
86 
87 using QuantizationParams = std::pair<float,int32_t>;
88 
89 struct Params
90 {
91  std::string m_ModelPath;
92  std::vector<std::string> m_InputBindings;
93  std::vector<armnn::TensorShape> m_InputShapes;
94  std::vector<std::string> m_OutputBindings;
95  std::vector<armnn::BackendId> m_ComputeDevices;
96  std::string m_DynamicBackendsPath;
97  size_t m_SubgraphId;
110  unsigned int m_NumberOfThreads;
111  std::string m_MLGOTuningFilePath;
114 
115 
117  : m_ComputeDevices{}
118  , m_SubgraphId(0)
119  , m_IsModelBinary(true)
121  , m_EnableFp16TurboMode(false)
122  , m_EnableBf16TurboMode(false)
124  , m_ParseUnsupported(false)
125  , m_InferOutputShape(false)
126  , m_EnableFastMath(false)
127  , m_SaveCachedNetwork(false)
128  , m_OutputDetailsToStdOut(false)
131  , m_NumberOfThreads(0)
133  , m_AsyncEnabled(false)
134  , m_ThreadPoolSize(0)
135  {}
136 };
137 
138 } // namespace InferenceModelInternal
139 
140 template <typename IParser>
142 {
143 public:
145 
146  static armnn::INetworkPtr Create(const Params& params,
147  std::vector<armnn::BindingPointInfo>& inputBindings,
148  std::vector<armnn::BindingPointInfo>& outputBindings)
149  {
150  const std::string& modelPath = params.m_ModelPath;
151 
152  // Create a network from a file on disk
153  auto parser(IParser::Create());
154 
155  std::map<std::string, armnn::TensorShape> inputShapes;
156  if (!params.m_InputShapes.empty())
157  {
158  const size_t numInputShapes = params.m_InputShapes.size();
159  const size_t numInputBindings = params.m_InputBindings.size();
160  if (numInputShapes < numInputBindings)
161  {
162  throw armnn::Exception(fmt::format(
163  "Not every input has its tensor shape specified: expected={0}, got={1}",
164  numInputBindings, numInputShapes));
165  }
166 
167  for (size_t i = 0; i < numInputShapes; i++)
168  {
169  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
170  }
171  }
172 
173  std::vector<std::string> requestedOutputs = params.m_OutputBindings;
174  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
175 
176  {
177  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
178  // Handle text and binary input differently by calling the corresponding parser function
179  network = (params.m_IsModelBinary ?
180  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
181  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
182  }
183 
184  for (const std::string& inputLayerName : params.m_InputBindings)
185  {
186  inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
187  }
188 
189  for (const std::string& outputLayerName : params.m_OutputBindings)
190  {
191  outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
192  }
193 
194  return network;
195  }
196 };
197 
198 #if defined(ARMNN_SERIALIZER)
199 template <>
200 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
201 {
202 public:
203  using IParser = armnnDeserializer::IDeserializer;
205 
206  static armnn::INetworkPtr Create(const Params& params,
207  std::vector<armnn::BindingPointInfo>& inputBindings,
208  std::vector<armnn::BindingPointInfo>& outputBindings)
209  {
210  auto parser(IParser::Create());
211  ARMNN_ASSERT(parser);
212 
213  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
214 
215  {
216  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
217 
218  std::error_code errorCode;
219  fs::path pathToFile(params.m_ModelPath);
220  if (!fs::exists(pathToFile, errorCode))
221  {
222  throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}",
223  params.m_ModelPath,
224  errorCode.message(),
225  CHECK_LOCATION().AsString()));
226  }
227  std::ifstream file(params.m_ModelPath, std::ios::binary);
228 
229  network = parser->CreateNetworkFromBinary(file);
230  }
231 
232  unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId);
233 
234  for (const std::string& inputLayerName : params.m_InputBindings)
235  {
237  parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
238  inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
239  }
240 
241  for (const std::string& outputLayerName : params.m_OutputBindings)
242  {
244  parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
245  outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
246  }
247 
248  return network;
249  }
250 };
251 #endif
252 
253 #if defined(ARMNN_TF_LITE_PARSER)
254 template <>
255 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
256 {
257 public:
258  using IParser = armnnTfLiteParser::ITfLiteParser;
260 
261  static armnn::INetworkPtr Create(const Params& params,
262  std::vector<armnn::BindingPointInfo>& inputBindings,
263  std::vector<armnn::BindingPointInfo>& outputBindings)
264  {
265  const std::string& modelPath = params.m_ModelPath;
266 
267  // Create a network from a file on disk
268  IParser::TfLiteParserOptions options;
269  options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
270  options.m_InferAndValidate = params.m_InferOutputShape;
271  auto parser(IParser::Create(options));
272 
273  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
274 
275  {
276  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
277  network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
278  }
279 
280  for (const std::string& inputLayerName : params.m_InputBindings)
281  {
282  armnn::BindingPointInfo inputBinding =
283  parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
284  inputBindings.push_back(inputBinding);
285  }
286 
287  for (const std::string& outputLayerName : params.m_OutputBindings)
288  {
289  armnn::BindingPointInfo outputBinding =
290  parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
291  outputBindings.push_back(outputBinding);
292  }
293 
294  return network;
295  }
296 };
297 #endif
298 
299 #if defined(ARMNN_ONNX_PARSER)
300 template <>
301 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
302 {
303 public:
304  using IParser = armnnOnnxParser::IOnnxParser;
307 
308  static armnn::INetworkPtr Create(const Params& params,
309  std::vector<BindingPointInfo>& inputBindings,
310  std::vector<BindingPointInfo>& outputBindings)
311  {
312  const std::string& modelPath = params.m_ModelPath;
313 
314  // Create a network from a file on disk
315  auto parser(IParser::Create());
316 
317  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
318 
319  std::map<std::string, armnn::TensorShape> inputShapes;
320  if (!params.m_InputShapes.empty())
321  {
322  const size_t numInputShapes = params.m_InputShapes.size();
323  const size_t numInputBindings = params.m_InputBindings.size();
324  if (numInputShapes < numInputBindings)
325  {
326  throw armnn::Exception(fmt::format(
327  "Not every input has its tensor shape specified: expected={0}, got={1}",
328  numInputBindings, numInputShapes));
329  }
330 
331  for (size_t i = 0; i < numInputShapes; i++)
332  {
333  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
334  }
335 
336  {
337  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
338  network = (params.m_IsModelBinary ?
339  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
340  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
341  }
342  }
343 
344  else
345  {
346  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
347  network = (params.m_IsModelBinary ?
348  parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
349  parser->CreateNetworkFromTextFile(modelPath.c_str()));
350  }
351 
352  for (const std::string& inputLayerName : params.m_InputBindings)
353  {
354  BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
355  inputBindings.push_back(inputBinding);
356  }
357 
358  for (const std::string& outputLayerName : params.m_OutputBindings)
359  {
360  BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
361  outputBindings.push_back(outputBinding);
362  }
363 
364  return network;
365  }
366 };
367 #endif
368 
369 
370 
371 template <typename IParser, typename TDataType>
373 {
374 public:
375  using DataType = TDataType;
378 
379 
381  {
382  std::string m_ModelDir;
383  std::vector<std::string> m_ComputeDevices;
388  std::string m_Labels;
389 
390  std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
391  {
392  std::vector<armnn::BackendId> backendIds;
393  std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
394  return backendIds;
395  }
396  };
397 
398  static void AddCommandLineOptions(cxxopts::Options& options,
399  CommandLineOptions& cLineOptions, std::vector<std::string>& required)
400  {
401  const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
402 
403  const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
405 
406  options
407  .allow_unrecognised_options()
408  .add_options()
409  ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
410  cxxopts::value<std::string>(cLineOptions.m_ModelDir))
411  ("c,compute", backendsMessage.c_str(),
412  cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
413  ("b,dynamic-backends-path",
414  "Path where to load any available dynamic backend from. "
415  "If left empty (the default), dynamic backends will not be used.",
416  cxxopts::value(cLineOptions.m_DynamicBackendsPath))
417  ("l,labels",
418  "Text file containing one image filename - correct label pair per line, "
419  "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
420  ("v,visualize-optimized-model",
421  "Produce a dot file useful for visualizing the graph post optimization."
422  "The file will have the same name as the model with the .dot extention.",
423  cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
424  ("fp16-turbo-mode",
425  "If this option is enabled FP32 layers, weights and biases will be converted "
426  "to FP16 where the backend supports it.",
427  cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
428  ("bf16-turbo-mode",
429  "If this option is enabled FP32 layers, weights and biases will be converted "
430  "to BF16 where the backend supports it.",
431  cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
432 
433  required.emplace_back("model-dir");
434  }
435 
436  InferenceModel(const Params& params,
437  bool enableProfiling,
438  const std::string& dynamicBackendsPath,
439  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
440  : m_EnableProfiling(enableProfiling),
441  m_ProfilingDetailsMethod(armnn::ProfilingDetailsMethod::Undefined)
442  , m_DynamicBackendsPath(dynamicBackendsPath)
443  {
444  if (runtime)
445  {
446  m_Runtime = runtime;
447  }
448  else
449  {
451  options.m_EnableGpuProfiling = m_EnableProfiling;
453  m_Runtime = armnn::IRuntime::Create(options);
454  }
455 
456  // Configure the Profiler if the the profiling details are opted for
457  if (params.m_OutputDetailsOnlyToStdOut)
458  m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
459  else if (params.m_OutputDetailsToStdOut)
460  m_ProfilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
461 
462  std::string invalidBackends;
463  if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
464  {
465  throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
466  }
467 
469  {
470  const auto parsing_start_time = armnn::GetTimeNow();
472 
473  ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
474  << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
475 
476  ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
477 
478  armnn::OptimizerOptions options;
479  options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
480  options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
481  options.m_Debug = params.m_PrintIntermediateLayers;
482  options.m_shapeInferenceMethod = params.m_InferOutputShape ?
484  options.m_ProfilingEnabled = m_EnableProfiling;
485 
486  armnn::BackendOptions gpuAcc("GpuAcc",
487  {
488  { "FastMathEnabled", params.m_EnableFastMath },
489  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
490  { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
491  { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
492  });
493 
494  armnn::BackendOptions cpuAcc("CpuAcc",
495  {
496  { "FastMathEnabled", params.m_EnableFastMath },
497  { "NumberOfThreads", params.m_NumberOfThreads }
498  });
499  options.m_ModelOptions.push_back(gpuAcc);
500  options.m_ModelOptions.push_back(cpuAcc);
501 
502  const auto optimization_start_time = armnn::GetTimeNow();
503  optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
504 
505  ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
506  << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
507 
508  if (!optNet)
509  {
510  throw armnn::Exception("Optimize returned nullptr");
511  }
512 
513 
514  }
515 
517  {
518  fs::path filename = params.m_ModelPath;
519  filename.replace_extension("dot");
520  std::fstream file(filename.c_str(), std::ios_base::out);
521  optNet->SerializeToDot(file);
522  }
523 
524  armnn::Status ret;
525  {
526  ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
527 
528  const auto loading_start_time = armnn::GetTimeNow();
529  armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
532  enableProfiling,
533  m_ProfilingDetailsMethod);
534  std::string errorMessage;
535  ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
536 
537  ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
538  << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms\n";
539 
540  if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0)
541  {
542  std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
543  for (size_t i = 0; i < params.m_ThreadPoolSize; ++i)
544  {
545  memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
546  }
547 
548  m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize,
549  m_Runtime.get(),
550  memHandles);
551  }
552  }
553 
554  if (ret == armnn::Status::Failure)
555  {
556  throw armnn::Exception("IRuntime::LoadNetwork failed");
557  }
558  }
559 
560  void CheckInputIndexIsValid(unsigned int inputIndex) const
561  {
562  if (m_InputBindings.size() < inputIndex + 1)
563  {
564  throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
565  }
566  }
567 
568  void CheckOutputIndexIsValid(unsigned int outputIndex) const
569  {
570  if (m_OutputBindings.size() < outputIndex + 1)
571  {
572  throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
573  }
574  }
575 
576  unsigned int GetInputSize(unsigned int inputIndex = 0u) const
577  {
578  CheckInputIndexIsValid(inputIndex);
579  return m_InputBindings[inputIndex].second.GetNumElements();
580  }
581 
582  unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
583  {
584  CheckOutputIndexIsValid(outputIndex);
585  return m_OutputBindings[outputIndex].second.GetNumElements();
586  }
587 
588  std::chrono::duration<double, std::milli> Run(
589  const std::vector<armnnUtils::TContainer>& inputContainers,
590  std::vector<armnnUtils::TContainer>& outputContainers)
591  {
592  for (unsigned int i = 0; i < outputContainers.size(); ++i)
593  {
594  const unsigned int expectedOutputDataSize = GetOutputSize(i);
595 
596  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
597  {
598  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
599  if (actualOutputDataSize < expectedOutputDataSize)
600  {
601  unsigned int outputIndex = i;
602  throw armnn::Exception(
603  fmt::format("Not enough data for output #{0}: expected "
604  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
605  }
606  },
607  outputContainers[i]);
608  }
609 
610  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
611 
612  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
613  const auto start_time = armnn::GetTimeNow();
614 
615  armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
616  MakeInputTensors(inputContainers),
617  MakeOutputTensors(outputContainers));
618  const auto duration = armnn::GetTimeDuration(start_time);
619 
620  // if profiling is enabled print out the results
621  if (profiler && profiler->IsProfilingEnabled())
622  {
623  profiler->Print(std::cout);
624  }
625 
626  if (ret == armnn::Status::Failure)
627  {
628  throw armnn::Exception("IRuntime::EnqueueWorkload failed");
629  }
630  else
631  {
632  return duration;
633  }
634  }
635 
636  std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync(
637  armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
638  const std::vector<armnnUtils::TContainer>& inputContainers,
639  std::vector<armnnUtils::TContainer>& outputContainers,
640  unsigned int inferenceID)
641  {
642  for (unsigned int i = 0; i < outputContainers.size(); ++i)
643  {
644  const unsigned int expectedOutputDataSize = GetOutputSize(i);
645 
646  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
647  {
648  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
649  if (actualOutputDataSize < expectedOutputDataSize)
650  {
651  unsigned int outputIndex = i;
652  throw armnn::Exception(
653  fmt::format("Not enough data for output #{0}: expected "
654  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
655  }
656  },
657  outputContainers[i]);
658  }
659 
660  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
661 
662  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
663  const auto start_time = armnn::GetTimeNow();
664 
665  armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
666  MakeInputTensors(inputContainers),
667  MakeOutputTensors(outputContainers));
668 
669  const auto duration = armnn::GetTimeDuration(start_time);
670 
671  // if profiling is enabled print out the results
672  if (profiler && profiler->IsProfilingEnabled())
673  {
674  profiler->Print(std::cout);
675  }
676 
677  if (ret == armnn::Status::Failure)
678  {
679  throw armnn::Exception(
680  fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
681  m_NetworkIdentifier, inferenceID));
682  }
683  else
684  {
685  return std::make_tuple(inferenceID, duration);
686  }
687  }
688 
689  void RunAsync(const std::vector<armnnUtils::TContainer>& inputContainers,
690  std::vector<armnnUtils::TContainer>& outputContainers,
691  std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
692  {
693  for (unsigned int i = 0; i < outputContainers.size(); ++i)
694  {
695  const unsigned int expectedOutputDataSize = GetOutputSize(i);
696 
697  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
698  {
699  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
700  if (actualOutputDataSize < expectedOutputDataSize)
701  {
702  unsigned int outputIndex = i;
703  throw armnn::Exception(
704  fmt::format("Not enough data for output #{0}: expected "
705  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
706  }
707  },
708  outputContainers[i]);
709  }
710 
711  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
712 
713  m_Threadpool->Schedule(m_NetworkIdentifier,
714  MakeInputTensors(inputContainers),
715  MakeOutputTensors(outputContainers),
717  cb);
718 
719  // if profiling is enabled print out the results
720  if (profiler && profiler->IsProfilingEnabled())
721  {
722  profiler->Print(std::cout);
723  }
724  }
725 
726  const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
727  {
728  CheckInputIndexIsValid(inputIndex);
729  return m_InputBindings[inputIndex];
730  }
731 
732  const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
733  {
734  return m_InputBindings;
735  }
736 
737  const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
738  {
739  CheckOutputIndexIsValid(outputIndex);
740  return m_OutputBindings[outputIndex];
741  }
742 
743  const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
744  {
745  return m_OutputBindings;
746  }
747 
748  QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
749  {
750  CheckOutputIndexIsValid(outputIndex);
751  return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
752  m_OutputBindings[outputIndex].second.GetQuantizationOffset());
753  }
754 
755  QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
756  {
757  CheckInputIndexIsValid(inputIndex);
758  return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
759  m_InputBindings[inputIndex].second.GetQuantizationOffset());
760  }
761 
762  std::vector<QuantizationParams> GetAllQuantizationParams() const
763  {
764  std::vector<QuantizationParams> quantizationParams;
765  for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
766  {
767  quantizationParams.push_back(GetQuantizationParams(i));
768  }
769  return quantizationParams;
770  }
771 
772  std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
773  {
774  return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
775  }
776 
777 private:
778  armnn::NetworkId m_NetworkIdentifier;
779  std::shared_ptr<armnn::IRuntime> m_Runtime;
780  std::unique_ptr<armnn::Threadpool> m_Threadpool;
781 
782  std::vector<armnn::BindingPointInfo> m_InputBindings;
783  std::vector<armnn::BindingPointInfo> m_OutputBindings;
784  bool m_EnableProfiling;
785  armnn::ProfilingDetailsMethod m_ProfilingDetailsMethod;
786  std::string m_DynamicBackendsPath;
787 
788  template<typename TContainer>
789  armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
790  {
791  return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
792  }
793 
794  template<typename TContainer>
795  armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
796  {
797  return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
798  }
799 };
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
ModelOptions m_ModelOptions
Definition: INetwork.hpp:189
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:40
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
ShapeInferenceMethod m_shapeInferenceMethod
Definition: INetwork.hpp:183
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:193
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:202
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
BackendRegistry & BackendRegistryInstance()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
Definition: INetwork.hpp:180
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:392
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
Copyright (c) 2021 ARM Limited and Contributors.
InferenceModelInternal::QuantizationParams QuantizationParams
std::string GetBackendIdsAsString() const
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
Definition: Types.hpp:58
void CheckInputIndexIsValid(unsigned int inputIndex) const
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
Definition: INetwork.hpp:170
InferenceModel(const Params &params, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1605
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
void RunAsync(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:25
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:393
Status
enumeration
Definition: Types.hpp:29
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:198
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
ArmNN performs an optimization on each model/network before it gets loaded for execution.
Definition: INetwork.hpp:120
#define CHECK_LOCATION()
Definition: Exceptions.hpp:209
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:129
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:124
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
Definition: Tensor.hpp:274
static armnn::INetworkPtr Create(const Params &params, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:197
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const