19 #include <common/include/ProfilingGuid.hpp> 21 #if defined(ARMNN_SERIALIZER) 24 #if defined(ARMNN_TF_LITE_PARSER) 27 #if defined(ARMNN_ONNX_PARSER) 36 #include <cxxopts/cxxopts.hpp> 38 #include <fmt/format.h> 39 #include <mapbox/variant.hpp> 47 #include <type_traits> 52 inline bool CheckRequestedBackendsAreValid(
const std::vector<armnn::BackendId>& backendIds,
55 if (backendIds.empty())
63 for (
const auto& backendId : backendIds)
65 if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end())
68 if (invalidBackendIds)
70 if (!invalidBackendIds.value().empty())
72 invalidBackendIds.value() +=
", ";
74 invalidBackendIds.value() += backendId;
144 template <
typename IParser>
151 std::vector<armnn::BindingPointInfo>& inputBindings,
152 std::vector<armnn::BindingPointInfo>& outputBindings)
157 auto parser(IParser::Create());
159 std::map<std::string, armnn::TensorShape> inputShapes;
164 if (numInputShapes < numInputBindings)
167 "Not every input has its tensor shape specified: expected={0}, got={1}",
168 numInputBindings, numInputShapes));
171 for (
size_t i = 0; i < numInputShapes; i++)
184 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) :
185 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs));
190 inputBindings.push_back(parser->GetNetworkInputBindingInfo(inputLayerName));
195 outputBindings.push_back(parser->GetNetworkOutputBindingInfo(outputLayerName));
202 #if defined(ARMNN_SERIALIZER) 211 std::vector<armnn::BindingPointInfo>& inputBindings,
212 std::vector<armnn::BindingPointInfo>& outputBindings)
214 auto parser(IParser::Create());
222 std::error_code errorCode;
224 if (!fs::exists(pathToFile, errorCode))
231 std::ifstream file(params.
m_ModelPath, std::ios::binary);
233 network = parser->CreateNetworkFromBinary(file);
241 parser->GetNetworkInputBindingInfo(subgraphId, inputLayerName);
248 parser->GetNetworkOutputBindingInfo(subgraphId, outputLayerName);
257 #if defined(ARMNN_TF_LITE_PARSER) 266 std::vector<armnn::BindingPointInfo>& inputBindings,
267 std::vector<armnn::BindingPointInfo>& outputBindings)
272 IParser::TfLiteParserOptions options;
276 auto parser(IParser::Create(options));
282 network = parser->CreateNetworkFromBinaryFile(modelPath.c_str());
288 parser->GetNetworkInputBindingInfo(params.
m_SubgraphId, inputLayerName);
289 inputBindings.push_back(inputBinding);
295 parser->GetNetworkOutputBindingInfo(params.
m_SubgraphId, outputLayerName);
296 outputBindings.push_back(outputBinding);
304 #if defined(ARMNN_ONNX_PARSER) 314 std::vector<BindingPointInfo>& inputBindings,
315 std::vector<BindingPointInfo>& outputBindings)
320 auto parser(IParser::Create());
324 std::map<std::string, armnn::TensorShape> inputShapes;
329 if (numInputShapes < numInputBindings)
332 "Not every input has its tensor shape specified: expected={0}, got={1}",
333 numInputBindings, numInputShapes));
336 for (
size_t i = 0; i < numInputShapes; i++)
344 parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
345 parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes));
353 parser->CreateNetworkFromBinaryFile(modelPath.c_str()) :
354 parser->CreateNetworkFromTextFile(modelPath.c_str()));
359 BindingPointInfo inputBinding = parser->GetNetworkInputBindingInfo(inputLayerName);
360 inputBindings.push_back(inputBinding);
365 BindingPointInfo outputBinding = parser->GetNetworkOutputBindingInfo(outputLayerName);
366 outputBindings.push_back(outputBinding);
376 template <
typename IParser,
typename TDataType>
397 std::vector<armnn::BackendId> backendIds;
398 std::copy(m_ComputeDevices.begin(), m_ComputeDevices.end(), std::back_inserter(backendIds));
406 const std::vector<std::string> defaultComputes = {
"CpuAcc",
"CpuRef" };
408 const std::string backendsMessage =
"Which device to run layers on by default. Possible choices: " 412 .allow_unrecognised_options()
414 (
"m,model-dir",
"Path to directory containing model files (.prototxt/.tflite)",
415 cxxopts::value<std::string>(cLineOptions.
m_ModelDir))
416 (
"c,compute", backendsMessage.c_str(),
417 cxxopts::value<std::vector<std::string>>(cLineOptions.
m_ComputeDevices)->default_value(
"CpuRef"))
418 (
"b,dynamic-backends-path",
419 "Path where to load any available dynamic backend from. " 420 "If left empty (the default), dynamic backends will not be used.",
423 "Text file containing one image filename - correct label pair per line, " 424 "used to test the accuracy of the network.", cxxopts::value<std::string>(cLineOptions.
m_Labels))
425 (
"v,visualize-optimized-model",
426 "Produce a dot file useful for visualizing the graph post optimization." 427 "The file will have the same name as the model with the .dot extention.",
430 "If this option is enabled FP32 layers, weights and biases will be converted " 431 "to FP16 where the backend supports it.",
434 "If this option is enabled FP32 layers, weights and biases will be converted " 435 "to BF16 where the backend supports it.",
438 required.emplace_back(
"model-dir");
442 bool enableProfiling,
443 const std::string& dynamicBackendsPath,
444 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
445 : m_EnableProfiling(enableProfiling),
468 std::string invalidBackends;
479 ARMNN_LOG(info) <<
"Network parsing time: " << std::setprecision(2)
511 ARMNN_LOG(info) <<
"Optimization time: " << std::setprecision(2)
525 filename.replace_extension(
"dot");
526 std::fstream file(filename.c_str(), std::ios_base::out);
527 optNet->SerializeToDot(file);
539 m_ProfilingDetailsMethod);
540 std::string errorMessage;
541 ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet), errorMessage, networkProperties);
543 ARMNN_LOG(info) <<
"Network loading time: " << std::setprecision(2)
548 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
551 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier));
570 throw armnn::Exception(fmt::format(
"Input index out of range: {}", inputIndex));
578 throw armnn::Exception(fmt::format(
"Output index out of range: {}", outputIndex));
584 CheckInputIndexIsValid(inputIndex);
590 CheckOutputIndexIsValid(outputIndex);
594 std::chrono::duration<double, std::milli>
Run(
595 const std::vector<armnnUtils::TContainer>& inputContainers,
596 std::vector<armnnUtils::TContainer>& outputContainers)
598 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
600 const unsigned int expectedOutputDataSize = GetOutputSize(i);
602 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
605 if (actualOutputDataSize < expectedOutputDataSize)
607 unsigned int outputIndex = i;
609 fmt::format(
"Not enough data for output #{0}: expected " 610 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
613 outputContainers[i]);
616 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
624 std::vector<armnn::ImportedInputId> importedInputIds = m_Runtime->ImportInputs(
627 std::vector<armnn::ImportedOutputId> importedOutputIds = m_Runtime->ImportOutputs(
630 ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
638 ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier,
645 if (profiler && profiler->IsProfilingEnabled())
647 profiler->Print(std::cout);
660 std::tuple<unsigned int, std::chrono::duration<double, std::milli>>
RunAsync(
662 const std::vector<armnnUtils::TContainer>& inputContainers,
663 std::vector<armnnUtils::TContainer>& outputContainers,
664 unsigned int inferenceID)
666 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
668 const unsigned int expectedOutputDataSize = GetOutputSize(i);
670 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
673 if (actualOutputDataSize < expectedOutputDataSize)
675 unsigned int outputIndex = i;
677 fmt::format(
"Not enough data for output #{0}: expected " 678 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
681 outputContainers[i]);
684 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
696 if (profiler && profiler->IsProfilingEnabled())
698 profiler->Print(std::cout);
704 fmt::format(
"IRuntime::Execute asynchronously failed for network #{0} on inference #{1}",
705 m_NetworkIdentifier, inferenceID));
709 return std::make_tuple(inferenceID, duration);
713 void RunAsync(
const std::vector<armnnUtils::TContainer>& inputContainers,
714 std::vector<armnnUtils::TContainer>& outputContainers,
715 std::shared_ptr<armnn::IAsyncExecutionCallback> cb)
717 for (
unsigned int i = 0; i < outputContainers.size(); ++i)
719 const unsigned int expectedOutputDataSize = GetOutputSize(i);
721 mapbox::util::apply_visitor([expectedOutputDataSize, i](
auto&& value)
724 if (actualOutputDataSize < expectedOutputDataSize)
726 unsigned int outputIndex = i;
728 fmt::format(
"Not enough data for output #{0}: expected " 729 "{1} elements, got {2}", outputIndex, expectedOutputDataSize, actualOutputDataSize));
732 outputContainers[i]);
735 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkIdentifier);
737 m_Threadpool->Schedule(m_NetworkIdentifier,
744 if (profiler && profiler->IsProfilingEnabled())
746 profiler->Print(std::cout);
752 CheckInputIndexIsValid(inputIndex);
763 CheckOutputIndexIsValid(outputIndex);
774 CheckOutputIndexIsValid(outputIndex);
775 return std::make_pair(
m_OutputBindings[outputIndex].second.GetQuantizationScale(),
781 CheckInputIndexIsValid(inputIndex);
782 return std::make_pair(
m_InputBindings[inputIndex].second.GetQuantizationScale(),
788 std::vector<QuantizationParams> quantizationParams;
791 quantizationParams.push_back(GetQuantizationParams(i));
793 return quantizationParams;
798 return m_Runtime->CreateWorkingMemHandle(m_NetworkIdentifier);
803 std::shared_ptr<armnn::IRuntime> m_Runtime;
804 std::unique_ptr<armnn::Threadpool> m_Threadpool;
808 bool m_EnableProfiling;
813 template<
typename TContainer>
819 template<
typename TContainer>
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
ModelOptions m_ModelOptions
static IRuntimePtr Create(const CreationOptions &options)
BackendIdSet GetBackendIds() const
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
ShapeInferenceMethod m_shapeInferenceMethod
std::unordered_set< BackendId > BackendIdSet
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
bool m_EnableFp16TurboMode
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< std::reference_wrapper< TContainer >> &inputDataContainers)
static void AddCommandLineOptions(cxxopts::Options &options, CommandLineOptions &cLineOptions, std::vector< std::string > &required)
std::string m_DynamicBackendsPath
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
#define ARMNN_LOG(severity)
Main network class which provides the interface for building up a neural network. ...
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
BackendRegistry & BackendRegistryInstance()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
const armnn::BindingPointInfo & GetInputBindingInfo(unsigned int inputIndex=0u) const
armnn::BindingPointInfo BindingPointInfo
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
Copyright (c) 2021 ARM Limited and Contributors.
InferenceModelInternal::QuantizationParams QuantizationParams
std::string m_DynamicBackendsPath
std::string GetBackendIdsAsString() const
bool m_VisualizePostOptimizationModel
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
void CheckInputIndexIsValid(unsigned int inputIndex) const
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
InferenceModel(const Params ¶ms, bool enableProfiling, const std::string &dynamicBackendsPath, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
std::vector< armnn::TensorShape > m_InputShapes
bool m_ImportInputsIfAligned
armnn::InputTensors MakeInputTensors(const std::vector< armnn::BindingPointInfo > &inputBindings, const std::vector< TContainer > &inputDataContainers)
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
void RunAsync(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, std::shared_ptr< armnn::IAsyncExecutionCallback > cb)
#define ARMNN_SCOPED_HEAP_PROFILING(TAG)
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
QuantizationParams GetQuantizationParams(unsigned int outputIndex=0u) const
#define ARMNN_ASSERT(COND)
std::vector< QuantizationParams > GetAllQuantizationParams() const
std::pair< float, int32_t > QuantizationParams
ArmNN performs an optimization on each model/network before it gets loaded for execution.
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, std::vector< TContainer > &outputDataContainers)
bool m_EnableBf16TurboMode
bool m_VisualizePostOptimizationModel
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
static armnn::INetworkPtr Create(const Params ¶ms, std::vector< armnn::BindingPointInfo > &inputBindings, std::vector< armnn::BindingPointInfo > &outputBindings)
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::vector< armnn::BackendId > GetComputeDevicesAsBackendIds()
armnn::TensorInfo m_TensorInfo
Base class for all ArmNN exceptions so that users can filter to just those.
std::vector< std::string > m_ComputeDevices
unsigned int GetInputSize(unsigned int inputIndex=0u) const
armnn::OutputTensors MakeOutputTensors(const std::vector< armnn::BindingPointInfo > &outputBindings, const std::vector< std::reference_wrapper< TContainer >> &outputDataContainers)
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
unsigned int m_NumberOfThreads
bool m_OutputDetailsOnlyToStdOut
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
Infer missing output shapes and validate all output shapes.
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
armnn::LayerBindingId m_BindingId
void CheckOutputIndexIsValid(unsigned int outputIndex) const
const std::vector< armnn::BindingPointInfo > & GetInputBindingInfos() const