ArmNN
 21.05
InferenceModel< IParser, TDataType > Class Template Reference

#include <InferenceModel.hpp>

Classes

struct  CommandLineOptions
 

Public Types

using DataType = TDataType
 
using Params = InferenceModelInternal::Params
 
using QuantizationParams = InferenceModelInternal::QuantizationParams
 
using TContainer = mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > >
 

Public Member Functions

 InferenceModel (const Params &params, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
 
void CheckInputIndexIsValid (unsigned int inputIndex) const
 
void CheckOutputIndexIsValid (unsigned int outputIndex) const
 
unsigned int GetInputSize (unsigned int inputIndex=0u) const
 
unsigned int GetOutputSize (unsigned int outputIndex=0u) const
 
std::chrono::duration< double, std::milli > Run (const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
 
std::tuple< armnn::profiling::ProfilingGuid, std::chrono::duration< double, std::milli > > RunAsync (armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
 
const armnn::BindingPointInfoGetInputBindingInfo (unsigned int inputIndex=0u) const
 
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos () const
 
const armnn::BindingPointInfoGetOutputBindingInfo (unsigned int outputIndex=0u) const
 
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos () const
 
QuantizationParams GetQuantizationParams (unsigned int outputIndex=0u) const
 
QuantizationParams GetInputQuantizationParams (unsigned int inputIndex=0u) const
 
std::vector< QuantizationParamsGetAllQuantizationParams () const
 
std::unique_ptr< armnn::experimental::IWorkingMemHandleCreateWorkingMemHandle ()
 

Static Public Member Functions

static void AddCommandLineOptions (cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
 

Detailed Description

template<typename IParser, typename TDataType>
class InferenceModel< IParser, TDataType >

Definition at line 334 of file InferenceModel.hpp.

Member Typedef Documentation

◆ DataType

using DataType = TDataType

Definition at line 337 of file InferenceModel.hpp.

◆ Params

Definition at line 338 of file InferenceModel.hpp.

◆ QuantizationParams

◆ TContainer

using TContainer = mapbox::util::variant<std::vector<float>, std::vector<int>, std::vector<unsigned char>, std::vector<int8_t> >

Definition at line 341 of file InferenceModel.hpp.

Constructor & Destructor Documentation

◆ InferenceModel()

InferenceModel ( const Params params,
bool  enableProfiling,
const std::string &  dynamicBackendsPath,
const std::shared_ptr< armnn::IRuntime > &  runtime = nullptr 
)
inline

Definition at line 399 of file InferenceModel.hpp.

References ARMNN_LOG, ARMNN_SCOPED_HEAP_PROFILING, CreateNetworkImpl< IParser >::Create(), IRuntime::Create(), armnn::Failure, armnn::GetTimeDuration(), armnn::GetTimeNow(), Params::m_AsyncEnabled, Params::m_CachedNetworkFilePath, Params::m_ComputeDevices, OptimizerOptions::m_Debug, Params::m_DynamicBackendsPath, IRuntime::CreationOptions::m_DynamicBackendsPath, Params::m_EnableBf16TurboMode, Params::m_EnableFastMath, Params::m_EnableFp16TurboMode, IRuntime::CreationOptions::m_EnableGpuProfiling, Params::m_InputBindings, Params::m_MLGOTuningFilePath, OptimizerOptions::m_ModelOptions, Params::m_ModelPath, Params::m_NumberOfThreads, Params::m_OutputBindings, Params::m_PrintIntermediateLayers, OptimizerOptions::m_ReduceFp32ToBf16, OptimizerOptions::m_ReduceFp32ToFp16, Params::m_SaveCachedNetwork, Params::m_VisualizePostOptimizationModel, armnn::Optimize(), and armnn::Undefined.

403  : m_EnableProfiling(enableProfiling)
404  , m_DynamicBackendsPath(dynamicBackendsPath)
405  {
406  if (runtime)
407  {
408  m_Runtime = runtime;
409  }
410  else
411  {
413  options.m_EnableGpuProfiling = m_EnableProfiling;
414  options.m_DynamicBackendsPath = m_DynamicBackendsPath;
415  m_Runtime = std::move(armnn::IRuntime::Create(options));
416  }
417 
418  std::string invalidBackends;
419  if (!CheckRequestedBackendsAreValid(params.m_ComputeDevices, armnn::Optional<std::string&>(invalidBackends)))
420  {
421  throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends);
422  }
423 
425  {
426  const auto parsing_start_time = armnn::GetTimeNow();
427  armnn::INetworkPtr network = CreateNetworkImpl<IParser>::Create(params, m_InputBindings, m_OutputBindings);
428 
429  ARMNN_LOG(info) << "Network parsing time: " << std::setprecision(2)
430  << std::fixed << armnn::GetTimeDuration(parsing_start_time).count() << " ms\n";
431 
432  ARMNN_SCOPED_HEAP_PROFILING("Optimizing");
433 
434  armnn::OptimizerOptions options;
435  options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode;
436  options.m_ReduceFp32ToBf16 = params.m_EnableBf16TurboMode;
437  options.m_Debug = params.m_PrintIntermediateLayers;
438 
439  armnn::BackendOptions gpuAcc("GpuAcc",
440  {
441  { "FastMathEnabled", params.m_EnableFastMath },
442  { "SaveCachedNetwork", params.m_SaveCachedNetwork },
443  { "CachedNetworkFilePath", params.m_CachedNetworkFilePath },
444  { "MLGOTuningFilePath", params.m_MLGOTuningFilePath }
445  });
446 
447  armnn::BackendOptions cpuAcc("CpuAcc",
448  {
449  { "FastMathEnabled", params.m_EnableFastMath },
450  { "NumberOfThreads", params.m_NumberOfThreads }
451  });
452  options.m_ModelOptions.push_back(gpuAcc);
453  options.m_ModelOptions.push_back(cpuAcc);
454 
455  const auto optimization_start_time = armnn::GetTimeNow();
456  optNet = armnn::Optimize(*network, params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
457 
458  ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
459  << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
460 
461  if (!optNet)
462  {
463  throw armnn::Exception("Optimize returned nullptr");
464  }
465 
466 
467  }
468 
469  if (params.m_VisualizePostOptimizationModel)
470  {
471  fs::path filename = params.m_ModelPath;
472  filename.replace_extension("dot");
473  std::fstream file(filename.c_str(), std::ios_base::out);
474  optNet->SerializeToDot(file);
475  }
476 
477  armnn::Status ret;
478  {
479  ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork");
480 
481  const auto loading_start_time = armnn::GetTimeNow();
482  armnn::INetworkProperties networkProperties(params.m_AsyncEnabled,
485  std::string errorMessage;
486  ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
487 
488  ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2)
489  << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms\n";
490  }
491 
492  if (ret == armnn::Status::Failure)
493  {
494  throw armnn::Exception("IRuntime::LoadNetwork failed");
495  }
496  }
ModelOptions m_ModelOptions
Definition: INetwork.hpp:168
static IRuntimePtr Create(const CreationOptions &options)
Definition: Runtime.cpp:37
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
#define ARMNN_LOG(severity)
Definition: Logging.hpp:202
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
Definition: Network.cpp:1568
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Status
enumeration
Definition: Types.hpp:30
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
Definition: INetwork.hpp:174
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
Definition: IRuntime.hpp:93
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
Definition: IRuntime.hpp:89
static armnn::INetworkPtr Create(const Params &params, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
Definition: INetwork.hpp:173

Member Function Documentation

◆ AddCommandLineOptions()

static void AddCommandLineOptions ( cxxopts::Options &  options,
CommandLineOptions cLineOptions,
std::vector< std::string > &  required 
)
inlinestatic

Definition at line 361 of file InferenceModel.hpp.

References armnn::BackendRegistryInstance(), BackendRegistry::GetBackendIdsAsString(), InferenceModel< IParser, TDataType >::CommandLineOptions::m_ComputeDevices, InferenceModel< IParser, TDataType >::CommandLineOptions::m_DynamicBackendsPath, InferenceModel< IParser, TDataType >::CommandLineOptions::m_EnableBf16TurboMode, InferenceModel< IParser, TDataType >::CommandLineOptions::m_EnableFp16TurboMode, InferenceModel< IParser, TDataType >::CommandLineOptions::m_Labels, InferenceModel< IParser, TDataType >::CommandLineOptions::m_ModelDir, and InferenceModel< IParser, TDataType >::CommandLineOptions::m_VisualizePostOptimizationModel.

Referenced by ClassifierTestCaseProvider< TDatabase, InferenceModel >::AddCommandLineOptions().

363  {
364  const std::vector<std::string> defaultComputes = { "CpuAcc", "CpuRef" };
365 
366  const std::string backendsMessage = "Which device to run layers on by default. Possible choices: "
368 
369  options
370  .allow_unrecognised_options()
371  .add_options()
372  ("m,model-dir", "Path to directory containing model files (.prototxt/.tflite)",
373  cxxopts::value<std::string>(cLineOptions.m_ModelDir))
374  ("c,compute", backendsMessage.c_str(),
375  cxxopts::value<std::vector<std::string>>(cLineOptions.m_ComputeDevices)->default_value("CpuRef"))
376  ("b,dynamic-backends-path",
377  "Path where to load any available dynamic backend from. "
378  "If left empty (the default), dynamic backends will not be used.",
379  cxxopts::value(cLineOptions.m_DynamicBackendsPath))
380  ("l,labels",
381  "Text file containing one image filename - correct label pair per line, "
382  "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.m_Labels))
383  ("v,visualize-optimized-model",
384  "Produce a dot file useful for visualizing the graph post optimization."
385  "The file will have the same name as the model with the .dot extention.",
386  cxxopts::value<bool>(cLineOptions.m_VisualizePostOptimizationModel)->default_value("false"))
387  ("fp16-turbo-mode",
388  "If this option is enabled FP32 layers, weights and biases will be converted "
389  "to FP16 where the backend supports it.",
390  cxxopts::value<bool>(cLineOptions.m_EnableFp16TurboMode)->default_value("false"))
391  ("bf16-turbo-mode",
392  "If this option is enabled FP32 layers, weights and biases will be converted "
393  "to BF16 where the backend supports it.",
394  cxxopts::value<bool>(cLineOptions.m_EnableBf16TurboMode)->default_value("false"));
395 
396  required.emplace_back("model-dir");
397  }
BackendRegistry & BackendRegistryInstance()
std::string GetBackendIdsAsString() const

◆ CheckInputIndexIsValid()

void CheckInputIndexIsValid ( unsigned int  inputIndex) const
inline

Definition at line 498 of file InferenceModel.hpp.

References Params::m_InputBindings.

499  {
500  if (m_InputBindings.size() < inputIndex + 1)
501  {
502  throw armnn::Exception(fmt::format("Input index out of range: {}", inputIndex));
503  }
504  }
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

◆ CheckOutputIndexIsValid()

void CheckOutputIndexIsValid ( unsigned int  outputIndex) const
inline

Definition at line 506 of file InferenceModel.hpp.

References Params::m_OutputBindings.

507  {
508  if (m_OutputBindings.size() < outputIndex + 1)
509  {
510  throw armnn::Exception(fmt::format("Output index out of range: {}", outputIndex));
511  }
512  }
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46

◆ CreateWorkingMemHandle()

std::unique_ptr<armnn::experimental::IWorkingMemHandle> CreateWorkingMemHandle ( )
inline

Definition at line 681 of file InferenceModel.hpp.

References Params::m_DynamicBackendsPath, Params::m_InputBindings, Params::m_OutputBindings, armnnUtils::MakeInputTensors(), MakeInputTensors(), armnnUtils::MakeOutputTensors(), and MakeOutputTensors().

Referenced by MainImpl().

682  {
683  return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
684  }

◆ GetAllQuantizationParams()

std::vector<QuantizationParams> GetAllQuantizationParams ( ) const
inline

Definition at line 671 of file InferenceModel.hpp.

References Params::m_OutputBindings.

672  {
673  std::vector<QuantizationParams> quantizationParams;
674  for (unsigned int i = 0u; i < m_OutputBindings.size(); i++)
675  {
676  quantizationParams.push_back(GetQuantizationParams(i));
677  }
678  return quantizationParams;
679  }
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const

◆ GetInputBindingInfo()

const armnn::BindingPointInfo& GetInputBindingInfo ( unsigned int  inputIndex = 0u) const
inline

Definition at line 635 of file InferenceModel.hpp.

References Params::m_InputBindings.

Referenced by main().

636  {
637  CheckInputIndexIsValid(inputIndex);
638  return m_InputBindings[inputIndex];
639  }
void CheckInputIndexIsValid(unsigned int inputIndex) const

◆ GetInputBindingInfos()

const std::vector<armnn::BindingPointInfo>& GetInputBindingInfos ( ) const
inline

Definition at line 641 of file InferenceModel.hpp.

References Params::m_InputBindings.

642  {
643  return m_InputBindings;
644  }

◆ GetInputQuantizationParams()

QuantizationParams GetInputQuantizationParams ( unsigned int  inputIndex = 0u) const
inline

Definition at line 664 of file InferenceModel.hpp.

References Params::m_InputBindings.

Referenced by MainImpl().

665  {
666  CheckInputIndexIsValid(inputIndex);
667  return std::make_pair(m_InputBindings[inputIndex].second.GetQuantizationScale(),
668  m_InputBindings[inputIndex].second.GetQuantizationOffset());
669  }
void CheckInputIndexIsValid(unsigned int inputIndex) const

◆ GetInputSize()

unsigned int GetInputSize ( unsigned int  inputIndex = 0u) const
inline

Definition at line 514 of file InferenceModel.hpp.

References Params::m_InputBindings.

Referenced by MainImpl().

515  {
516  CheckInputIndexIsValid(inputIndex);
517  return m_InputBindings[inputIndex].second.GetNumElements();
518  }
void CheckInputIndexIsValid(unsigned int inputIndex) const

◆ GetOutputBindingInfo()

const armnn::BindingPointInfo& GetOutputBindingInfo ( unsigned int  outputIndex = 0u) const
inline

Definition at line 646 of file InferenceModel.hpp.

References Params::m_OutputBindings.

647  {
648  CheckOutputIndexIsValid(outputIndex);
649  return m_OutputBindings[outputIndex];
650  }
void CheckOutputIndexIsValid(unsigned int outputIndex) const

◆ GetOutputBindingInfos()

const std::vector<armnn::BindingPointInfo>& GetOutputBindingInfos ( ) const
inline

Definition at line 652 of file InferenceModel.hpp.

References Params::m_OutputBindings.

Referenced by MainImpl().

653  {
654  return m_OutputBindings;
655  }

◆ GetOutputSize()

unsigned int GetOutputSize ( unsigned int  outputIndex = 0u) const
inline

Definition at line 520 of file InferenceModel.hpp.

References Params::m_OutputBindings.

Referenced by main(), and MainImpl().

521  {
522  CheckOutputIndexIsValid(outputIndex);
523  return m_OutputBindings[outputIndex].second.GetNumElements();
524  }
void CheckOutputIndexIsValid(unsigned int outputIndex) const

◆ GetQuantizationParams()

QuantizationParams GetQuantizationParams ( unsigned int  outputIndex = 0u) const
inline

Definition at line 657 of file InferenceModel.hpp.

References Params::m_OutputBindings.

658  {
659  CheckOutputIndexIsValid(outputIndex);
660  return std::make_pair(m_OutputBindings[outputIndex].second.GetQuantizationScale(),
661  m_OutputBindings[outputIndex].second.GetQuantizationOffset());
662  }
void CheckOutputIndexIsValid(unsigned int outputIndex) const

◆ Run()

std::chrono::duration<double, std::milli> Run ( const std::vector< TContainer > &  inputContainers,
std::vector< TContainer > &  outputContainers 
)
inline

Definition at line 526 of file InferenceModel.hpp.

References armnn::Failure, armnn::GetTimeDuration(), armnn::GetTimeNow(), MakeInputTensors(), MakeOutputTensors(), and armnn::numeric_cast().

Referenced by MainImpl().

529  {
530  for (unsigned int i = 0; i < outputContainers.size(); ++i)
531  {
532  const unsigned int expectedOutputDataSize = GetOutputSize(i);
533 
534  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
535  {
536  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
537  if (actualOutputDataSize < expectedOutputDataSize)
538  {
539  unsigned int outputIndex = i;
540  throw armnn::Exception(
541  fmt::format("Not enough data for output #{0}: expected "
542  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
543  }
544  },
545  outputContainers[i]);
546  }
547 
548  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
549  if (profiler)
550  {
551  profiler->EnableProfiling(m_EnableProfiling);
552  }
553 
554  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
555  const auto start_time = armnn::GetTimeNow();
556 
557  armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
558  MakeInputTensors(inputContainers),
559  MakeOutputTensors(outputContainers));
560  const auto duration = armnn::GetTimeDuration(start_time);
561 
562  // if profiling is enabled print out the results
563  if (profiler && profiler->IsProfilingEnabled())
564  {
565  profiler->Print(std::cout);
566  }
567 
568  if (ret == armnn::Status::Failure)
569  {
570  throw armnn::Exception("IRuntime::EnqueueWorkload failed");
571  }
572  else
573  {
574  return duration;
575  }
576  }
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
Status
enumeration
Definition: Types.hpp:30
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

◆ RunAsync()

std::tuple<armnn::profiling::ProfilingGuid, std::chrono::duration<double, std::milli> > RunAsync ( armnn::experimental::IWorkingMemHandle workingMemHandleRef,
const std::vector< TContainer > &  inputContainers,
std::vector< TContainer > &  outputContainers 
)
inline

Definition at line 578 of file InferenceModel.hpp.

References armnn::Failure, IWorkingMemHandle::GetInferenceId(), armnn::GetTimeDuration(), armnn::GetTimeNow(), MakeInputTensors(), MakeOutputTensors(), and armnn::numeric_cast().

Referenced by MainImpl().

582  {
583  for (unsigned int i = 0; i < outputContainers.size(); ++i)
584  {
585  const unsigned int expectedOutputDataSize = GetOutputSize(i);
586 
587  mapbox::util::apply_visitor([expectedOutputDataSize, i](auto&& value)
588  {
589  const unsigned int actualOutputDataSize = armnn::numeric_cast<unsigned int>(value.size());
590  if (actualOutputDataSize < expectedOutputDataSize)
591  {
592  unsigned int outputIndex = i;
593  throw armnn::Exception(
594  fmt::format("Not enough data for output #{0}: expected "
595  "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
596  }
597  },
598  outputContainers[i]);
599  }
600 
601  std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
602  if (profiler)
603  {
604  profiler->EnableProfiling(m_EnableProfiling);
605  }
606 
607  // Start timer to record inference time in EnqueueWorkload (in milliseconds)
608  const auto start_time = armnn::GetTimeNow();
609 
610  armnn::Status ret = m_Runtime->Execute(workingMemHandleRef,
611  MakeInputTensors(inputContainers),
612  MakeOutputTensors(outputContainers));
613  auto inferenceID = workingMemHandleRef.GetInferenceId();
614 
615  const auto duration = armnn::GetTimeDuration(start_time);
616 
617  // if profiling is enabled print out the results
618  if (profiler && profiler->IsProfilingEnabled())
619  {
620  profiler->Print(std::cout);
621  }
622 
623  if (ret == armnn::Status::Failure)
624  {
625  throw armnn::Exception(
626  fmt::format("IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
627  m_NetworkIdentifier, inferenceID));
628  }
629  else
630  {
631  return std::make_tuple(inferenceID, duration);
632  }
633  }
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
Definition: Timer.hpp:19
std::chrono::high_resolution_clock::time_point GetTimeNow()
Definition: Timer.hpp:14
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
Status
enumeration
Definition: Types.hpp:30
virtual profiling::ProfilingGuid GetInferenceId()=0
Returns the InferenceId of the Inference that this IWorkingMemHandle works with.
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Definition: NumericCast.hpp:35

The documentation for this class was generated from the following file: