20 #include <common/include/ProfilingGuid.hpp> 22 #if defined(ARMNN_SERIALIZER) 25 #if defined(ARMNN_TF_LITE_PARSER) 28 #if defined(ARMNN_ONNX_PARSER) 37 #include <cxxopts/cxxopts.hpp> 39 #include <fmt/format.h> 40 #include <mapbox/variant.hpp> 48 #include <type_traits> 111 template <
typename IParser>
118 std::vector<armnn::BindingPointInfo>& inputBindings,
119 std::vector<armnn::BindingPointInfo>& outputBindings)
124 auto parser(IParser::Create());
126 std::map<std::string, armnn::TensorShape> inputShapes;
131 if (numInputShapes < numInputBindings)
134 "Not every input has its tensor shape specified: expected={0}, got={1}",
135 numInputBindings, numInputShapes));
138 for (
size_t i = 0; i < numInputShapes; i++)
151 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
152 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
157 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
162 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
169 #if defined(ARMNN_SERIALIZER) 178 std::vector<armnn::BindingPointInfo>& inputBindings,
179 std::vector<armnn::BindingPointInfo>& outputBindings)
181 auto parser(IParser::Create());
189 std::error_code errorCode;
191 if (!fs::exists(pathToFile, errorCode))
198 std::ifstream file(params.
m_ModelPath, std::ios::binary);
200 network = parser->CreateNetworkFromBinary(file);
208 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
215 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
224 #if defined(ARMNN_TF_LITE_PARSER) 233 std::vector<armnn::BindingPointInfo>& inputBindings,
234 std::vector<armnn::BindingPointInfo>& outputBindings)
239 IParser::TfLiteParserOptions options;
243 auto parser(IParser::Create(options));
249 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
255 parser->GetNetworkInputBindingInfo(params.
m_SubgraphId, inputLayerName);
256 inputBindings.push_back(inputBinding);
262 parser->GetNetworkOutputBindingInfo(params.
m_SubgraphId, outputLayerName);
263 outputBindings.push_back(outputBinding);
271 #if defined(ARMNN_ONNX_PARSER) 281 std::vector<BindingPointInfo>& inputBindings,
282 std::vector<BindingPointInfo>& outputBindings)
287 auto parser(IParser::Create());
291 std::map<std::string, armnn::TensorShape> inputShapes;
296 if (numInputShapes < numInputBindings)
299 "Not every input has its tensor shape specified: expected={0}, got={1}",
300 numInputBindings, numInputShapes));
303 for (
size_t i = 0; i < numInputShapes; i++)
311 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
312 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
320 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
321 parser->CreateNetworkFromTextFile(modelPath.c_str()));
326 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
327 inputBindings.push_back(inputBinding);
332 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
333 outputBindings.push_back(outputBinding);
343 template <
typename IParser,
typename TDataType>
364 std::vector<armnn::BackendId> backendIds;
365 std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
373 const std::vector<std::string> defaultComputes = {
"CpuAcc",
"CpuRef" };
375 const std::string backendsMessage =
"Which device to run layers on by default. Possible choices: " 379 .allow_unrecognised_options()
381 (
"m,model-dir",
"Path to directory containing model files (.prototxt/.tflite)",
382 cxxopts::value<std::string>(cLineOptions.
m_ModelDir))
383 (
"c,compute", backendsMessage.c_str(),
384 cxxopts::value<std::vector<std::string>>(cLineOptions.
m_ComputeDevices)->default_value(
"CpuRef"))
385 (
"b,dynamic-backends-path",
386 "Path where to load any available dynamic backend from. " 387 "If left empty (the default), dynamic backends will not be used.",
390 "Text file containing one image filename - correct label pair per line, " 391 "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.
m_Labels))
392 (
"v,visualize-optimized-model",
393 "Produce a dot file useful for visualizing the graph post optimization." 394 "The file will have the same name as the model with the .dot extention.",
397 "If this option is enabled FP32 layers, weights and biases will be converted " 398 "to FP16 where the backend supports it.",
401 "If this option is enabled FP32 layers, weights and biases will be converted " 402 "to BF16 where the backend supports it.",
405 required.emplace_back(
"model-dir");
409 bool enableProfiling,
410 const std::string& dynamicBackendsPath,
411 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
412 : m_EnableProfiling(enableProfiling),
435 std::string invalidBackends;
446 ARMNN_LOG(info) <<
"Network parsing time: " << std::setprecision(2)
478 ARMNN_LOG(info) <<
"Optimization time: " << std::setprecision(2)
492 filename.replace_extension(
"dot");
493 std::fstream file(filename.c_str(), std::ios_base::out);
494 optNet->SerializeToDot(file);
506 m_ProfilingDetailsMethod);
507 std::string errorMessage;
508 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
510 ARMNN_LOG(info) <<
"Network loading time: " << std::setprecision(2)
515 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
518 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
537 throw armnn::Exception(fmt::format(
"Input index out of range: {}", inputIndex));
545 throw armnn::Exception(fmt::format(
"Output index out of range: {}", outputIndex));
551 CheckInputIndexIsValid(inputIndex);
557 CheckOutputIndexIsValid(outputIndex);
561 std::chrono::duration<double, std::milli>
Run(
562 const std::vector<armnnUtils::TContainer>& inputContainers,
563 std::vector<armnnUtils::TContainer>& outputContainers)
565 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
567 const unsigned int expectedOutputDataSize = GetOutputSize(i);
569 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
572 if (actualOutputDataSize < expectedOutputDataSize)
574 unsigned int outputIndex = i;
576 fmt::format(
"Not enough data for output #{0}: expected " 577 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
580 outputContainers[i]);
583 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
591 std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs(
594 std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs(
597 ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
605 ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
612 if (profiler && profiler->IsProfilingEnabled())
614 profiler->Print(std::cout);
627 std::tuple<unsigned int, std::chrono::duration<double, std::milli>>
RunAsync(
629 const std::vector<armnnUtils::TContainer>& inputContainers,
630 std::vector<armnnUtils::TContainer>& outputContainers,
631 unsigned int inferenceID)
633 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
635 const unsigned int expectedOutputDataSize = GetOutputSize(i);
637 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
640 if (actualOutputDataSize < expectedOutputDataSize)
642 unsigned int outputIndex = i;
644 fmt::format(
"Not enough data for output #{0}: expected " 645 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
648 outputContainers[i]);
651 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
663 if (profiler && profiler->IsProfilingEnabled())
665 profiler->Print(std::cout);
671 fmt::format(
"IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
672 m_NetworkIdentifier, inferenceID));
676 return std::make_tuple(inferenceID, duration);
680 void RunAsync(
const std::vector<armnnUtils::TContainer>& inputContainers,
681 std::vector<armnnUtils::TContainer>& outputContainers,
682 std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
684 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
686 const unsigned int expectedOutputDataSize = GetOutputSize(i);
688 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
691 if (actualOutputDataSize < expectedOutputDataSize)
693 unsigned int outputIndex = i;
695 fmt::format(
"Not enough data for output #{0}: expected " 696 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
699 outputContainers[i]);
702 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
704 m_Threadpool->Schedule(m_NetworkIdentifier,
711 if (profiler && profiler->IsProfilingEnabled())
713 profiler->Print(std::cout);
719 CheckInputIndexIsValid(inputIndex);
730 CheckOutputIndexIsValid(outputIndex);
741 CheckOutputIndexIsValid(outputIndex);
742 return std::make_pair(
m_OutputBindings[outputIndex].second.GetQuantizationScale(),
748 CheckInputIndexIsValid(inputIndex);
749 return std::make_pair(
m_InputBindings[inputIndex].second.GetQuantizationScale(),
755 std::vector<QuantizationParams> quantizationParams;
758 quantizationParams.push_back(GetQuantizationParams(i));
760 return quantizationParams;
765 return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
770 std::shared_ptr<armnn::IRuntime> m_Runtime;
771 std::unique_ptr<armnn::Threadpool> m_Threadpool;
775 bool m_EnableProfiling;
780 template<
typename TContainer>
786 template<
typename TContainer>
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
ModelOptions m_ModelOptions
static IRuntimePtr Create(const CreationOptions &options)
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
ShapeInferenceMethod m_shapeInferenceMethod
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
bool m_EnableFp16TurboMode
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
std::string m_DynamicBackendsPath
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Main network class which provides the interface for building up a neural network. ...
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
BackendRegistry & BackendRegistryInstance()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
Copyright (c) 2021 ARM Limited and Contributors.
InferenceModelInternal::QuantizationParams QuantizationParams
std::string m_DynamicBackendsPath
std::string GetBackendIdsAsString() const
bool m_VisualizePostOptimizationModel
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
void CheckInputIndexIsValid(unsigned int inputIndex) const
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
InferenceModel(const Params ¶ms, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
bool m_ImportInputsIfAligned
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
void RunAsync(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
bool CheckRequestedBackendsAreValid(const std::vector< armnn::BackendId > &backendIds, armnn::Optional< std::string &> invalidBackendIds=armnn::EmptyOptional())
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
ArmNN performs an optimization on each model/network before it gets loaded for execution.
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
bool m_EnableBf16TurboMode
bool m_VisualizePostOptimizationModel
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
static armnn::INetworkPtr Create(const Params ¶ms, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
armnn::TensorInfo m_TensorInfo
Base class for all ArmNN exceptions so that users can filter to just those.
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
unsigned int m_NumberOfThreads
bool m_OutputDetailsOnlyToStdOut
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
armnn::LayerBindingId m_BindingId
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const