ArmNN
 21.08
InferenceModel.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
8 #include <armnn/ArmNN.hpp>
9 #include <armnn/Threadpool.hpp>
10 #include <armnn/Logging.hpp>
11 #include <armnn/utility/Timer.hpp>
13 #include <armnn/utility/Assert.hpp>
15 #include <common/include/ProfilingGuid.hpp>
16 
17 #if defined(ARMNN_SERIALIZER)
19 #endif
20 #if defined(ARMNN_TF_LITE_PARSER)
22 #endif
23 #if defined(ARMNN_ONNX_PARSER)
25 #endif
26 
28 #include <HeapProfiling.hpp>
29 #include <TensorIOUtils.hpp>
30 
32 #include <cxxopts/cxxopts.hpp>
33 #include "CxxoptsUtils.hpp"
34 #include <fmt/format.h>
35 #include <mapbox/variant.hpp>
36 
37 #include <algorithm>
38 #include <iterator>
39 #include <fstream>
40 #include <map>
41 #include <string>
42 #include <vector>
43 #include <type_traits>
44 
45 namespace
46 {
47 
48 inline bool CheckRequestedBackendsAreValid(const std::vector<armnn::BackendId>& backendIds,
50 {
51  if (backendIds.empty())
52  {
53  return false;
54  }
55 
57 
58  bool allValid = true;
59  for (const auto& backendId : backendIds)
60  {
61  if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
62  {
63  allValid = false;
64  if (invalidBackendIds)
65  {
66  if (!invalidBackendIds.value().empty())
67  {
68  invalidBackendIds.value() += ", ";
69  }
70  invalidBackendIds.value() += backendId;
71  }
72  }
73  }
74  return allValid;
75 }
76 
77 } // anonymous namespace
78 
80 {
82 
83 using QuantizationParams = std::pair<float,int32_t>;
84 
85 struct Params
86 {
87  std::string m_ModelPath;
88  std::vector<std::string> m_InputBindings;
89  std::vector<armnn::TensorShape> m_InputShapes;
90  std::vector<std::string> m_OutputBindings;
91  std::vector<armnn::BackendId> m_ComputeDevices;
92  std::string m_DynamicBackendsPath;
93  size_t m_SubgraphId;
105  unsigned int m_NumberOfThreads;
106  std::string m_MLGOTuningFilePath;
109 
110 
112  : m_ComputeDevices{}
113  , m_SubgraphId(0)
114  , m_IsModelBinary(true)
116  , m_EnableFp16TurboMode(false)
117  , m_EnableBf16TurboMode(false)
119  , m_ParseUnsupported(false)
120  , m_InferOutputShape(false)
121  , m_EnableFastMath(false)
122  , m_SaveCachedNetwork(false)
123  , m_OutputDetailsToStdOut(false)
125  , m_NumberOfThreads(0)
127  , m_AsyncEnabled(false)
128  , m_ThreadPoolSize(0)
129  {}
130 };
131 
132 } // namespace InferenceModelInternal
133 
134 template <typename IParser>
136 {
137 public:
139 
140  static armnn::INetworkPtr Create(const Params& params,
141  std::vector<armnn::BindingPointInfo>& inputBindings,
142  std::vector<armnn::BindingPointInfo>& outputBindings)
143  {
144  const std::string& modelPath = params.m_ModelPath;
145 
146  // Create a network from a file on disk
147  auto parser(IParser::Create());
148 
149  std::map<std::string, armnn::TensorShape> inputShapes;
150  if (!params.m_InputShapes.empty())
151  {
152  const size_t numInputShapes = params.m_InputShapes.size();
153  const size_t numInputBindings = params.m_InputBindings.size();
154  if (numInputShapes < numInputBindings)
155  {
156  throw armnn::Exception(fmt::format(
157  "Not every input has its tensor shape specified: expected={0}, got={1}",
158  numInputBindings, numInputShapes));
159  }
160 
161  for (size_t i = 0; i < numInputShapes; i++)
162  {
163  inputShapes[params.m_InputBindings[i]] = params.m_InputShapes[i];
164  }
165  }
166 
167  std::vector<std::string> requestedOutputs = params.m_OutputBindings;
168  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
169 
170  {
171  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
172  // Handle text and binary input differently by calling the corresponding parser function
173  network = (params.m_IsModelBinary ?
174  parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
175  parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
176  }
177 
178  for (const std::string& inputLayerName : params.m_InputBindings)
179  {
180  inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
181  }
182 
183  for (const std::string& outputLayerName : params.m_OutputBindings)
184  {
185  outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
186  }
187 
188  return network;
189  }
190 };
191 
192 #if defined(ARMNN_SERIALIZER)
193 template <>
194 struct CreateNetworkImpl<armnnDeserializer::IDeserializer>
195 {
196 public:
197  using IParser = armnnDeserializer::IDeserializer;
199 
200  static armnn::INetworkPtr Create(const Params& params,
201  std::vector<armnn::BindingPointInfo>& inputBindings,
202  std::vector<armnn::BindingPointInfo>& outputBindings)
203  {
204  auto parser(IParser::Create());
205  ARMNN_ASSERT(parser);
206 
207  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
208 
209  {
210  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
211 
212  std::error_code errorCode;
213  fs::path pathToFile(params.m_ModelPath);
214  if (!fs::exists(pathToFile, errorCode))
215  {
216  throw armnn::FileNotFoundException(fmt::format("Cannot find the file ({0}) errorCode: {1} {2}",
217  params.m_ModelPath,
218  errorCode.message(),
219  CHECK_LOCATION().AsString()));
220  }
221  std::ifstream file(params.m_ModelPath, std::ios::binary);
222 
223  network = parser->CreateNetworkFromBinary(file);
224  }
225 
226  unsigned int subgraphId = armnn::numeric_cast<unsigned int>(params.m_SubgraphId);
227 
228  for (const std::string& inputLayerName : params.m_InputBindings)
229  {
231  parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
232  inputBindings.push_back(std::make_pair(inputBinding.m_BindingId, inputBinding.m_TensorInfo));
233  }
234 
235  for (const std::string& outputLayerName : params.m_OutputBindings)
236  {
238  parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
239  outputBindings.push_back(std::make_pair(outputBinding.m_BindingId, outputBinding.m_TensorInfo));
240  }
241 
242  return network;
243  }
244 };
245 #endif
246 
247 #if defined(ARMNN_TF_LITE_PARSER)
248 template <>
249 struct CreateNetworkImpl<armnnTfLiteParser::ITfLiteParser>
250 {
251 public:
252  using IParser = armnnTfLiteParser::ITfLiteParser;
254 
255  static armnn::INetworkPtr Create(const Params& params,
256  std::vector<armnn::BindingPointInfo>& inputBindings,
257  std::vector<armnn::BindingPointInfo>& outputBindings)
258  {
259  const std::string& modelPath = params.m_ModelPath;
260 
261  // Create a network from a file on disk
262  IParser::TfLiteParserOptions options;
263  options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
264  options.m_InferAndValidate = params.m_InferOutputShape;
265  auto parser(IParser::Create(options));
266 
267  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
268 
269  {
270  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
271  network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
272  }
273 
274  for (const std::string& inputLayerName : params.m_InputBindings)
275  {
276  armnn::BindingPointInfo inputBinding =
277  parser->GetNetworkInputBindingInfo(params.m_SubgraphId, inputLayerName);
278  inputBindings.push_back(inputBinding);
279  }
280 
281  for (const std::string& outputLayerName : params.m_OutputBindings)
282  {
283  armnn::BindingPointInfo outputBinding =
284  parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, outputLayerName);
285  outputBindings.push_back(outputBinding);
286  }
287 
288  return network;
289  }
290 };
291 #endif
292 
293 #if defined(ARMNN_ONNX_PARSER)
294 template <>
295 struct CreateNetworkImpl<armnnOnnxParser::IOnnxParser>
296 {
297 public:
298  using IParser = armnnOnnxParser::IOnnxParser;
301 
302  static armnn::INetworkPtr Create(const Params& params,
303  std::vector<BindingPointInfo>& inputBindings,
304  std::vector<BindingPointInfo>& outputBindings)
305  {
306  const std::string& modelPath = params.m_ModelPath;
307 
308  // Create a network from a file on disk
309  auto parser(IParser::Create());
310 
311  armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}};
312 
313  {
314  ARMNN_SCOPED_HEAP_PROFILING("Parsing");
315  network = (params.m_IsModelBinary ?
316  parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
317  parser->CreateNetworkFromTextFile(modelPath.c_str()));
318  }
319 
320  for (const std::string& inputLayerName : params.m_InputBindings)
321  {
322  BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
323  inputBindings.push_back(inputBinding);
324  }
325 
326  for (const std::string& outputLayerName : params.m_OutputBindings)
327  {
328  BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
329  outputBindings.push_back(outputBinding);
330  }
331 
332  return network;
333  }
334 };
335 #endif
336 
337 
338 
339 template <typename IParser, typename TDataType>
341 {
342 public:
343  using DataType = TDataType;
346  using TContainer
347  = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t>>;
348 
350  {
351  std::string m_ModelDir;
352  std::vector<std::string> m_ComputeDevices;
357  std::string m_Labels;
358 
359  std::vector<armnn::BackendId> GetComputeDevicesAsBackendIds()
360  {
361  std::vector<armnn::BackendId> backendIds;
362  std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
363  return backendIds;
364  }
365  };
366 
367  static void AddCommandLineOptions(cxxopts::Options& options,
368  CommandLineOptions& cLineOptions, std::vector<std::string>& required)
369  {
370  const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
371 
372  const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
374 
375  options
376  .allow_unrecognised_options()
377  .add_options()
378  ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
379  cxxopts::value<std::string>(cLineOptions.m_ModelDir))
380  ("c,compute", backendsMessage.c_str(),
381  cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
382  ("b,dynamic-backends-path",
383  "Path where to load any available dynamic backend from. "
384  "If left empty (the default), dynamic backends will not be used.",
385  cxxopts::value(cLineOptions.m_DynamicBackendsPath))
386  ("l,labels",
387  "Text file containing one image filename - correct label pair per line, "
388  "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
389  ("v,visualize-optimized-model",
390  "Produce a dot file useful for visualizing the graph post optimization."
391  "The file will have the same name as the model with the .dot extention.",
392  cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
393  ("fp16-turbo-mode",
394  "If this option is enabled FP32 layers, weights and biases will be converted "
395  "to FP16 where the backend supports it.",
396  cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
397  ("bf16-turbo-mode",
398  "If this option is enabled FP32 layers, weights and biases will be converted "
399  "to BF16 where the backend supports it.",
400  cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
401 
402  required.emplace_back("model-dir");
403  }
404 
405  InferenceModel(const Params& params,
406  bool enableProfiling,
407  const std::string& dynamicBackendsPath,
408  const std::shared_ptr<armnn::IRuntime>& runtime = nullptr)
409  : m_EnableProfiling(enableProfiling)
410  , m_DynamicBackendsPath(dynamicBackendsPath)
411  {
412  if (runtime)
413  {
414  m_Runtime = runtime;
415  }
416  else
417  {
419  options.m_EnableGpuProfiling = m_EnableProfiling;
421  m_Runtime = armnn::IRuntime::Create(options);
422  }
423 
424  std::string invalidBackends;
425  if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
426  {
427  throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
428  }
429 
431  {
432  const auto parsing_start_time = armnn::GetTimeNow();
434 
435  ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
436  << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
437 
438  ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
439 
440  armnn::OptimizerOptions options;
441  options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
442  options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
443  options.m_Debug = params.m_PrintIntermediateLayers;
444 
445  options.m_shapeInferenceMethod = params.m_InferOutputShape ?
447 
448  armnn::BackendOptions gpuAcc("GpuAcc",
449  {
450  { "FastMathEnabled", params.m_EnableFastMath },
451  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
452  { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
453  { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
454  });
455 
456  armnn::BackendOptions cpuAcc("CpuAcc",
457  {
458  { "FastMathEnabled", params.m_EnableFastMath },
459  { "NumberOfThreads", params.m_NumberOfThreads }
460  });
461  options.m_ModelOptions.push_back(gpuAcc);
462  options.m_ModelOptions.push_back(cpuAcc);
463 
464  const auto optimization_start_time = armnn::GetTimeNow();
465  optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
466 
467  ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
468  << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
469 
470  if (!optNet)
471  {
472  throw armnn::Exception("Optimize returned nullptr");
473  }
474 
475 
476  }
477 
479  {
480  fs::path filename = params.m_ModelPath;
481  filename.replace_extension("dot");
482  std::fstream file(filename.c_str(), std::ios_base::out);
483  optNet->SerializeToDot(file);
484  }
485 
486  armnn::Status ret;
487  {
488  ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
489 
490  const auto loading_start_time = armnn::GetTimeNow();
491  armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
494  enableProfiling,
495  params.m_OutputDetailsToStdOut);
496  std::string errorMessage;
497  ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
498 
499  ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
500  << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms\n";
501 
502  if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0)
503  {
504  std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
505  for (size_t i = 0; i < params.m_ThreadPoolSize; ++i)
506  {
507  memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
508  }
509 
510  m_Threadpool = std::make_unique<armnn::Threadpool>(params.m_ThreadPoolSize,
511  m_Runtime.get(),
512  memHandles);
513  }
514  }
515 
516  if (ret == armnn::Status::Failure)
517  {
518  throw armnn::Exception("IRuntime::LoadNetwork failed");
519  }
520  }
521 
522  void CheckInputIndexIsValid(unsigned int inputIndex) const
523  {
524  if (m_InputBindings.size() < inputIndex + 1)
525  {
526  throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
527  }
528  }
529 
530  void CheckOutputIndexIsValid(unsigned int outputIndex) const
531  {
532  if (m_OutputBindings.size() < outputIndex + 1)
533  {
534  throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
535  }
536  }
537 
538  unsigned int GetInputSize(unsigned int inputIndex = 0u) const
539  {
540  CheckInputIndexIsValid(inputIndex);
541  return m_InputBindings[inputIndex].second.GetNumElements();
542  }
543 
544  unsigned int GetOutputSize(unsigned int outputIndex = 0u) const
545  {
546  CheckOutputIndexIsValid(outputIndex);
547  return m_OutputBindings[outputIndex].second.GetNumElements();
548  }
549 
550  std::chrono::duration<double, std::milli> Run(
551  const std::vector<TContainer>& inputContainers,
552  std::vector<TContainer>& outputContainers)
553  {
554  for (unsigned int i = 0; i < outputContainers.size(); ++i)
555  {
556  const unsigned int expectedOutputDataSize = GetOutputSize(i);
557 
558  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
559  {
560  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
561  if (actualOutputDataSize < expectedOutputDataSize)
562  {
563  unsigned int outputIndex = i;
564  throw armnn::Exception(
565  fmt::format("Not enough data for output #{0}: expected "
566  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
567  }
568  },
569  outputContainers[i]);
570  }
571 
572  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
573 
574  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
575  const auto start_time = armnn::GetTimeNow();
576 
577  armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
578  MakeInputTensors(inputContainers),
579  MakeOutputTensors(outputContainers));
580  const auto duration = armnn::GetTimeDuration(start_time);
581 
582  // if profiling is enabled print out the results
583  if (profiler && profiler->IsProfilingEnabled())
584  {
585  profiler->Print(std::cout);
586  }
587 
588  if (ret == armnn::Status::Failure)
589  {
590  throw armnn::Exception("IRuntime::EnqueueWorkload failed");
591  }
592  else
593  {
594  return duration;
595  }
596  }
597 
598  std::tuple<unsigned int, std::chrono::duration<double, std::milli>> RunAsync(
599  armnn::experimental::IWorkingMemHandle& workingMemHandleRef,
600  const std::vector<TContainer>& inputContainers,
601  std::vector<TContainer>& outputContainers,
602  unsigned int inferenceID)
603  {
604  for (unsigned int i = 0; i < outputContainers.size(); ++i)
605  {
606  const unsigned int expectedOutputDataSize = GetOutputSize(i);
607 
608  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
609  {
610  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
611  if (actualOutputDataSize < expectedOutputDataSize)
612  {
613  unsigned int outputIndex = i;
614  throw armnn::Exception(
615  fmt::format("Not enough data for output #{0}: expected "
616  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
617  }
618  },
619  outputContainers[i]);
620  }
621 
622  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
623 
624  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
625  const auto start_time = armnn::GetTimeNow();
626 
627  armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
628  MakeInputTensors(inputContainers),
629  MakeOutputTensors(outputContainers));
630 
631  const auto duration = armnn::GetTimeDuration(start_time);
632 
633  // if profiling is enabled print out the results
634  if (profiler && profiler->IsProfilingEnabled())
635  {
636  profiler->Print(std::cout);
637  }
638 
639  if (ret == armnn::Status::Failure)
640  {
641  throw armnn::Exception(
642  fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
643  m_NetworkIdentifier, inferenceID));
644  }
645  else
646  {
647  return std::make_tuple(inferenceID, duration);
648  }
649  }
650 
651  void RunAsync(const std::vector<TContainer>& inputContainers,
652  std::vector<TContainer>& outputContainers,
653  std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
654  {
655  for (unsigned int i = 0; i < outputContainers.size(); ++i)
656  {
657  const unsigned int expectedOutputDataSize = GetOutputSize(i);
658 
659  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
660  {
661  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
662  if (actualOutputDataSize < expectedOutputDataSize)
663  {
664  unsigned int outputIndex = i;
665  throw armnn::Exception(
666  fmt::format("Not enough data for output #{0}: expected "
667  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
668  }
669  },
670  outputContainers[i]);
671  }
672 
673  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
674 
675  m_Threadpool->Schedule(m_NetworkIdentifier,
676  MakeInputTensors(inputContainers),
677  MakeOutputTensors(outputContainers),
679  cb);
680 
681  // if profiling is enabled print out the results
682  if (profiler && profiler->IsProfilingEnabled())
683  {
684  profiler->Print(std::cout);
685  }
686  }
687 
688  const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const
689  {
690  CheckInputIndexIsValid(inputIndex);
691  return m_InputBindings[inputIndex];
692  }
693 
694  const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos() const
695  {
696  return m_InputBindings;
697  }
698 
699  const armnn::BindingPointInfo& GetOutputBindingInfo(unsigned int outputIndex = 0u) const
700  {
701  CheckOutputIndexIsValid(outputIndex);
702  return m_OutputBindings[outputIndex];
703  }
704 
705  const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos() const
706  {
707  return m_OutputBindings;
708  }
709 
710  QuantizationParams GetQuantizationParams(unsigned int outputIndex = 0u) const
711  {
712  CheckOutputIndexIsValid(outputIndex);
713  return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
714  m_OutputBindings[outputIndex].second.GetQuantizationOffset());
715  }
716 
717  QuantizationParams GetInputQuantizationParams(unsigned int inputIndex = 0u) const
718  {
719  CheckInputIndexIsValid(inputIndex);
720  return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
721  m_InputBindings[inputIndex].second.GetQuantizationOffset());
722  }
723 
724  std::vector<QuantizationParams> GetAllQuantizationParams() const
725  {
726  std::vector<QuantizationParams> quantizationParams;
727  for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
728  {
729  quantizationParams.push_back(GetQuantizationParams(i));
730  }
731  return quantizationParams;
732  }
733 
734  std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle()
735  {
736  return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
737  }
738 
739 private:
740  armnn::NetworkId m_NetworkIdentifier;
741  std::shared_ptr<armnn::IRuntime> m_Runtime;
742  std::unique_ptr<armnn::Threadpool> m_Threadpool;
743 
744  std::vector<armnn::BindingPointInfo> m_InputBindings;
745  std::vector<armnn::BindingPointInfo> m_OutputBindings;
746  bool m_EnableProfiling;
747  std::string m_DynamicBackendsPath;
748 
749  template<typename TContainer>
750  armnn::InputTensors MakeInputTensors(const std::vector<TContainer>& inputDataContainers)
751  {
752  return armnnUtils::MakeInputTensors(m_InputBindings, inputDataContainers);
753  }
754 
755  template<typename TContainer>
756  armnn::OutputTensors MakeOutputTensors(std::vector<TContainer>& outputDataContainers)
757  {
758  return armnnUtils::MakeOutputTensors(m_OutputBindings, outputDataContainers);
759  }
760 };
ModelOptions m_ModelOptions
Definition: INetwork.hpp:167
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:39
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
ShapeInferenceMethod m_shapeInferenceMethod
Definition: INetwork.hpp:161
std::unordered_set< BackendId > BackendIdSet
Definition: BackendId.hpp:191
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
Main network class which provides the interface for building up a neural network. ...
Definition: INetwork.hpp:177
BackendRegistry & BackendRegistryInstance()
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
Definition: Tensor.hpp:360
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
InferenceModelInternal::QuantizationParams QuantizationParams
std::string GetBackendIdsAsString() const
void CheckInputIndexIsValid(unsigned int inputIndex) const
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
InferenceModel(const Params &params, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
void RunAsync(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1613
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
int NetworkId
Definition: IRuntime.hpp:24
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
Definition: Tensor.hpp:361
Status
enumeration
Definition: Types.hpp:29
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:173
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
#define CHECK_LOCATION()
Definition: Exceptions.hpp:197
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers, unsigned int inferenceID)
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:120
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:116
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
Definition: Tensor.hpp:274
static armnn::INetworkPtr Create(const Params &params, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::chrono::duration< double, std::milli > Run(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:172
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const