19 #include <common/include/ProfilingGuid.hpp> 21 #if defined(ARMNN_SERIALIZER) 24 #if defined(ARMNN_TF_LITE_PARSER) 27 #if defined(ARMNN_ONNX_PARSER) 36 #include <cxxopts/cxxopts.hpp> 38 #include <fmt/format.h> 39 #include <mapbox/variant.hpp> 47 #include <type_traits> 52 inline bool CheckRequestedBackendsAreValid(
const std::vector<armnn::BackendId>& backendIds,
55 if (backendIds.empty())
63 for (
const auto& backendId : backendIds)
65 if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
68 if (invalidBackendIds)
70 if (!invalidBackendIds.value().empty())
72 invalidBackendIds.value() +=
", ";
74 invalidBackendIds.value() += backendId;
140 template <
typename IParser>
147 std::vector<armnn::BindingPointInfo>& inputBindings,
148 std::vector<armnn::BindingPointInfo>& outputBindings)
153 auto parser(IParser::Create());
155 std::map<std::string, armnn::TensorShape> inputShapes;
160 if (numInputShapes < numInputBindings)
163 "Not every input has its tensor shape specified: expected={0}, got={1}",
164 numInputBindings, numInputShapes));
167 for (
size_t i = 0; i < numInputShapes; i++)
180 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
181 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
186 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
191 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
198 #if defined(ARMNN_SERIALIZER) 207 std::vector<armnn::BindingPointInfo>& inputBindings,
208 std::vector<armnn::BindingPointInfo>& outputBindings)
210 auto parser(IParser::Create());
218 std::error_code errorCode;
220 if (!fs::exists(pathToFile, errorCode))
227 std::ifstream file(params.
m_ModelPath, std::ios::binary);
229 network = parser->CreateNetworkFromBinary(file);
237 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
244 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
253 #if defined(ARMNN_TF_LITE_PARSER) 262 std::vector<armnn::BindingPointInfo>& inputBindings,
263 std::vector<armnn::BindingPointInfo>& outputBindings)
268 IParser::TfLiteParserOptions options;
271 auto parser(IParser::Create(options));
277 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
283 parser->GetNetworkInputBindingInfo(params.
m_SubgraphId, inputLayerName);
284 inputBindings.push_back(inputBinding);
290 parser->GetNetworkOutputBindingInfo(params.
m_SubgraphId, outputLayerName);
291 outputBindings.push_back(outputBinding);
299 #if defined(ARMNN_ONNX_PARSER) 309 std::vector<BindingPointInfo>& inputBindings,
310 std::vector<BindingPointInfo>& outputBindings)
315 auto parser(IParser::Create());
319 std::map<std::string, armnn::TensorShape> inputShapes;
324 if (numInputShapes < numInputBindings)
327 "Not every input has its tensor shape specified: expected={0}, got={1}",
328 numInputBindings, numInputShapes));
331 for (
size_t i = 0; i < numInputShapes; i++)
339 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
340 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
348 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
349 parser->CreateNetworkFromTextFile(modelPath.c_str()));
354 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
355 inputBindings.push_back(inputBinding);
360 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
361 outputBindings.push_back(outputBinding);
371 template <
typename IParser,
typename TDataType>
392 std::vector<armnn::BackendId> backendIds;
393 std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
401 const std::vector<std::string> defaultComputes = {
"CpuAcc",
"CpuRef" };
403 const std::string backendsMessage =
"Which device to run layers on by default. Possible choices: " 407 .allow_unrecognised_options()
409 (
"m,model-dir",
"Path to directory containing model files (.prototxt/.tflite)",
410 cxxopts::value<std::string>(cLineOptions.
m_ModelDir))
411 (
"c,compute", backendsMessage.c_str(),
412 cxxopts::value<std::vector<std::string>>(cLineOptions.
m_ComputeDevices)->default_value(
"CpuRef"))
413 (
"b,dynamic-backends-path",
414 "Path where to load any available dynamic backend from. " 415 "If left empty (the default), dynamic backends will not be used.",
418 "Text file containing one image filename - correct label pair per line, " 419 "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.
m_Labels))
420 (
"v,visualize-optimized-model",
421 "Produce a dot file useful for visualizing the graph post optimization." 422 "The file will have the same name as the model with the .dot extention.",
425 "If this option is enabled FP32 layers, weights and biases will be converted " 426 "to FP16 where the backend supports it.",
429 "If this option is enabled FP32 layers, weights and biases will be converted " 430 "to BF16 where the backend supports it.",
433 required.emplace_back(
"model-dir");
437 bool enableProfiling,
438 const std::string& dynamicBackendsPath,
439 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
440 : m_EnableProfiling(enableProfiling),
462 std::string invalidBackends;
473 ARMNN_LOG(info) <<
"Network parsing time: " << std::setprecision(2)
505 ARMNN_LOG(info) <<
"Optimization time: " << std::setprecision(2)
519 filename.replace_extension(
"dot");
520 std::fstream file(filename.c_str(), std::ios_base::out);
521 optNet->SerializeToDot(file);
533 m_ProfilingDetailsMethod);
534 std::string errorMessage;
535 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
537 ARMNN_LOG(info) <<
"Network loading time: " << std::setprecision(2)
542 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
545 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
564 throw armnn::Exception(fmt::format(
"Input index out of range: {}", inputIndex));
572 throw armnn::Exception(fmt::format(
"Output index out of range: {}", outputIndex));
578 CheckInputIndexIsValid(inputIndex);
584 CheckOutputIndexIsValid(outputIndex);
588 std::chrono::duration<double, std::milli>
Run(
589 const std::vector<armnnUtils::TContainer>& inputContainers,
590 std::vector<armnnUtils::TContainer>& outputContainers)
592 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
594 const unsigned int expectedOutputDataSize = GetOutputSize(i);
596 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
599 if (actualOutputDataSize < expectedOutputDataSize)
601 unsigned int outputIndex = i;
603 fmt::format(
"Not enough data for output #{0}: expected " 604 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
607 outputContainers[i]);
610 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
615 armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
621 if (profiler && profiler->IsProfilingEnabled())
623 profiler->Print(std::cout);
636 std::tuple<unsigned int, std::chrono::duration<double, std::milli>>
RunAsync(
638 const std::vector<armnnUtils::TContainer>& inputContainers,
639 std::vector<armnnUtils::TContainer>& outputContainers,
640 unsigned int inferenceID)
642 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
644 const unsigned int expectedOutputDataSize = GetOutputSize(i);
646 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
649 if (actualOutputDataSize < expectedOutputDataSize)
651 unsigned int outputIndex = i;
653 fmt::format(
"Not enough data for output #{0}: expected " 654 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
657 outputContainers[i]);
660 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
672 if (profiler && profiler->IsProfilingEnabled())
674 profiler->Print(std::cout);
680 fmt::format(
"IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
681 m_NetworkIdentifier, inferenceID));
685 return std::make_tuple(inferenceID, duration);
689 void RunAsync(
const std::vector<armnnUtils::TContainer>& inputContainers,
690 std::vector<armnnUtils::TContainer>& outputContainers,
691 std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
693 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
695 const unsigned int expectedOutputDataSize = GetOutputSize(i);
697 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
700 if (actualOutputDataSize < expectedOutputDataSize)
702 unsigned int outputIndex = i;
704 fmt::format(
"Not enough data for output #{0}: expected " 705 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
708 outputContainers[i]);
711 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
713 m_Threadpool->Schedule(m_NetworkIdentifier,
720 if (profiler && profiler->IsProfilingEnabled())
722 profiler->Print(std::cout);
728 CheckInputIndexIsValid(inputIndex);
739 CheckOutputIndexIsValid(outputIndex);
750 CheckOutputIndexIsValid(outputIndex);
751 return std::make_pair(
m_OutputBindings[outputIndex].second.GetQuantizationScale(),
757 CheckInputIndexIsValid(inputIndex);
758 return std::make_pair(
m_InputBindings[inputIndex].second.GetQuantizationScale(),
764 std::vector<QuantizationParams> quantizationParams;
767 quantizationParams.push_back(GetQuantizationParams(i));
769 return quantizationParams;
774 return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
779 std::shared_ptr<armnn::IRuntime> m_Runtime;
780 std::unique_ptr<armnn::Threadpool> m_Threadpool;
784 bool m_EnableProfiling;
788 template<
typename TContainer>
794 template<
typename TContainer>
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
ModelOptions m_ModelOptions
static IRuntimePtr Create(const CreationOptions &options)
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
ShapeInferenceMethod m_shapeInferenceMethod
std::unordered_set< BackendId > BackendIdSet
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
bool m_EnableFp16TurboMode
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
std::string m_DynamicBackendsPath
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Main network class which provides the interface for building up a neural network. ...
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
BackendRegistry & BackendRegistryInstance()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
Copyright (c) 2021 ARM Limited and Contributors.
InferenceModelInternal::QuantizationParams QuantizationParams
std::string m_DynamicBackendsPath
std::string GetBackendIdsAsString() const
bool m_VisualizePostOptimizationModel
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
void CheckInputIndexIsValid(unsigned int inputIndex) const
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
InferenceModel(const Params ¶ms, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
void RunAsync(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
ArmNN performs an optimization on each model/network before it gets loaded for execution.
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
bool m_EnableBf16TurboMode
bool m_VisualizePostOptimizationModel
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
static armnn::INetworkPtr Create(const Params ¶ms, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
armnn::TensorInfo m_TensorInfo
Base class for all ArmNN exceptions so that users can filter to just those.
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
unsigned int m_NumberOfThreads
bool m_OutputDetailsOnlyToStdOut
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
armnn::LayerBindingId m_BindingId
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const