14 using namespace armnn;
24 auto parser = CreateParser();
25 auto network = parser->CreateNetwork(m_Params);
26 auto optNet = OptimizeNetwork(network.get());
28 m_IOInfo = GetIOInfo(optNet.get());
29 SetupInputsAndOutputs();
44 MemorySource::Undefined,
45 MemorySource::Undefined,
47 profilingDetailsMethod};
49 m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
53 std::stringstream msg;
54 msg <<
"Network will be executed " << m_Params.
m_Iterations;
57 msg <<
" times in an asynchronous manner. ";
61 msg <<
" times successively. ";
63 msg <<
"The input-tensor-data files will be reused recursively if the user didn't provide enough to " 64 "cover each execution.";
70 ARMNN_LOG(
warning) <<
"The input data was generated, note that the output will not be useful";
75 ARMNN_LOG(
info) <<
"Printing outputs to console is disabled.";
79 void ArmNNExecutor::ExecuteAsync()
81 std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
82 std::unique_ptr<armnn::Threadpool> threadpool;
84 std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
88 memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
95 ARMNN_LOG(
info) <<
"Asynchronous Execution with Arm NN thread pool... \n";
97 std::chrono::high_resolution_clock::time_point earliestStartTime =
98 std::chrono::high_resolution_clock::time_point::max();
99 std::chrono::high_resolution_clock::time_point latestEndTime =
100 std::chrono::high_resolution_clock::now();
106 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
108 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.
GetNewCallback();
109 inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
110 threadpool->Schedule(m_NetworkId,
111 m_InputTensorsVec[i],
112 m_OutputTensorsVec[i],
118 for (
size_t iteration = 0; iteration < m_Params.
m_Iterations; ++iteration)
123 if (earliestStartTime > cb->GetStartTime())
125 earliestStartTime = cb->GetStartTime();
127 if (latestEndTime < cb->GetEndTime())
129 latestEndTime = cb->GetEndTime();
132 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
133 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
134 auto inferenceDuration = endTime - startTime;
139 PrintOutputTensors(out, iteration);
144 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
145 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
146 auto totalInferenceDuration = overallEndTime - overallStartTime;
147 ARMNN_LOG(
info) <<
"Overall Inference time: " << std::setprecision(2)
148 << std::fixed << totalInferenceDuration.count() <<
" ms\n";
152 void ArmNNExecutor::ExecuteSync()
156 std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
162 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
163 m_InputTensorsVec[x],
164 m_OutputTensorsVec[x],
165 m_ImportedInputIds[x],
166 m_ImportedOutputIds[x]);
170 ret = m_Runtime->EnqueueWorkload(m_NetworkId,
171 m_InputTensorsVec[x],
172 m_OutputTensorsVec[x]);
178 if(profiler && profiler->IsProfilingEnabled())
180 profiler->Print(std::cout);
190 PrintOutputTensors(&m_OutputTensorsVec[x], x);
208 std::vector<const void*> results;
209 for (
auto& output : m_OutputStorage)
211 results.push_back(output.m_Mem);
219 const std::vector<std::string>& inputNames = m_Params.
m_InputNames.size() != 0 ?
221 m_IOInfo.m_InputNames;
222 std::stringstream ss;
223 ss <<
"===== Network Info =====\n";
224 ss <<
"Inputs in order:\n";
225 for (
const auto& inputName : inputNames)
227 const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
228 ss << inputName <<
", " << inputInfo.GetShape() <<
", " <<
GetDataTypeName(inputInfo.GetDataType());
229 if (inputInfo.IsQuantized())
231 ss <<
" Quantization Offset: " << inputInfo.GetQuantizationOffset();
232 if (inputInfo.HasMultipleQuantizationScales())
234 ss <<
" Quantization scales: ";
235 for (
const auto scale: inputInfo.GetQuantizationScales())
242 ss <<
" Quantization scale: " << inputInfo.GetQuantizationScale();
248 ss <<
"Outputs in order:\n";
249 for (
const auto& outputName : m_IOInfo.m_OutputNames)
251 const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
252 ss << outputName <<
", " << outputInfo.GetShape() <<
", " <<
GetDataTypeName(outputInfo.GetDataType());
253 if (outputInfo.IsQuantized())
255 ss <<
" Quantization Offset: " << outputInfo.GetQuantizationOffset();
256 if (outputInfo.HasMultipleQuantizationScales())
258 ss <<
" Quantization scales: ";
259 for (
const auto scale: outputInfo.GetQuantizationScales())
266 ss <<
" Quantization scale: " << outputInfo.GetQuantizationScale();
272 std::cout << ss.str() << std::endl;
275 void ArmNNExecutor::SetupInputsAndOutputs()
277 const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
281 LogAndThrow(
"Number of input names does not match number of inputs");
285 const std::vector<std::string>& inputNames = m_Params.
m_InputNames.size() != 0 ?
287 m_IOInfo.m_InputNames;
288 unsigned int noInputSets = 1;
290 if (inputFilePaths != 0)
292 if (inputFilePaths % noOfInputs != 0)
294 LogAndThrow(
"Number of input files: " + std::to_string(inputFilePaths) +
295 " not compatible with number of inputs: " + std::to_string(noOfInputs));
297 noInputSets = inputFilePaths / noOfInputs;
300 LogAndThrow(
"Specifying multiple sets of inputs not compatible with ReuseBuffers");
304 const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
306 unsigned int noOutputSets = 1;
308 if (outputFilePaths != 0)
310 if (outputFilePaths % noOfOutputs != 0)
312 LogAndThrow(
"Number of output files: " + std::to_string(outputFilePaths) +
313 ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
315 noOutputSets = outputFilePaths / noOfOutputs;
319 LogAndThrow(
"Specifying multiple sets of outputs not compatible with ReuseBuffers");
332 ARMNN_LOG(
info) <<
"Given network has " << noOfInputs <<
" input/s. One input-tensor-data file is required " 333 <<
"for each input. The user provided " 335 <<
" input-tensor-data file/s which will be used to fill the input/s.\n";
338 unsigned int inputCount = 0;
339 for(
unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
342 for (
const auto& inputName: inputNames)
347 bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
349 catch (
const std::out_of_range& e)
351 LogAndThrow(
"Input with inputName: " + inputName +
" not found.");
360 m_InputStorage.emplace_back(IOStorage{tensorInfo.
GetNumBytes()});
362 const int bindingId = bindingPointInfo.first;
363 inputTensors.emplace_back(bindingId,
armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
367 armnn::MakeOptional<std::string>(
374 auto typedTensor =
reinterpret_cast<float*
>(m_InputStorage.back().m_Mem);
375 PopulateTensorWithData<float>(typedTensor, tensorInfo.
GetNumElements(), dataFile, inputName);
380 auto typedTensor =
reinterpret_cast<int16_t*
>(m_InputStorage.back().m_Mem);
381 PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.
GetNumElements(), dataFile, inputName);
387 auto typedTensor =
reinterpret_cast<int8_t*
>(m_InputStorage.back().m_Mem);
388 PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.
GetNumElements(), dataFile, inputName);
393 auto typedTensor =
reinterpret_cast<uint8_t*
>(m_InputStorage.back().m_Mem);
394 PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.
GetNumElements(), dataFile, inputName);
399 auto typedTensor =
reinterpret_cast<int32_t*
>(m_InputStorage.back().m_Mem);
400 PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.
GetNumElements(), dataFile, inputName);
411 m_ImportedInputIds.push_back(
415 m_InputTensorsVec.emplace_back(inputTensors);
418 for(
unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
421 for (
const auto& output: m_IOInfo.m_OutputInfoMap)
426 m_OutputStorage.emplace_back(tensorInfo.
GetNumBytes());
427 outputTensors.emplace_back(bindingPointInfo.first,
armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
429 m_OutputTensorsVec.emplace_back(outputTensors);
432 m_ImportedOutputIds.push_back(
438 const unsigned int remainingInputSets = m_Params.
m_Iterations - noInputSets;
439 for (
unsigned int i = 1; i <= remainingInputSets; i++)
441 m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
444 m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
448 const unsigned int remainingOutputSets = m_Params.
m_Iterations - noOutputSets;
449 for (
unsigned int i = 1; i <= remainingOutputSets; i++)
451 m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
454 m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
465 const std::vector<armnn::ConstTensor>& constants,
474 m_IOInfo.m_InputNames.emplace_back(name);
480 m_IOInfo.m_OutputNames.emplace_back(name);
490 IOStrategy ioStrategy;
493 return ioStrategy.m_IOInfo;
503 options.
m_Debug = m_Params.m_PrintIntermediate;
511 {
"FastMathEnabled", m_Params.m_EnableFastMath },
512 {
"SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
513 {
"CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
514 {
"MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
519 {
"FastMathEnabled", m_Params.m_EnableFastMath },
520 {
"NumberOfThreads", m_Params.m_NumberOfThreads }
526 optNet =
armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
528 ARMNN_LOG(
info) <<
"Optimization time: " << std::setprecision(2)
537 if (m_Params.m_EnableLayerDetails)
539 fs::path filename = m_Params.m_ModelPath;
540 filename.replace_extension(
"dot");
541 std::fstream file(filename.c_str(), std::ios_base::out);
548 std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
551 const std::string& modelFormat = m_Params.m_ModelPath;
553 m_Params.m_IsModelBinary = modelFormat.find(
"json") == std::string::npos ? true :
false;
554 std::unique_ptr<IParser> parser =
nullptr;
556 if (modelFormat.find(
"armnn") != std::string::npos)
558 #if defined(ARMNN_SERIALIZER) 559 parser = std::make_unique<ArmNNDeserializer>();
561 LogAndThrow(
"Not built with serialization support.");
564 else if(modelFormat.find(
"tflite") != std::string::npos)
566 #if defined(ARMNN_TF_LITE_PARSER) 567 parser = std::make_unique<TfliteParser>(m_Params);
569 LogAndThrow(
"Not built with Tensorflow-Lite parser support.");
572 else if (modelFormat.find(
"onnx") != std::string::npos)
574 #if defined(ARMNN_ONNX_PARSER) 575 parser = std::make_unique<OnnxParser>();
577 LogAndThrow(
"Not built with Onnx parser support.");
585 unsigned int iteration)
589 for (
auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
591 if (
id == it->second.first)
596 return std::string{};
599 unsigned int outputIndex = 0;
600 unsigned int numOutputs = outputTensors->size();
601 for (
const auto& output: *outputTensors)
603 const auto bindingName = findOutputName(output.first);
608 size_t outputFileIndex = iteration * numOutputs + outputIndex;
609 if (!m_Params.m_OutputTensorFiles.empty())
611 outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
612 ARMNN_LOG(
info) <<
"Writing output: " << bindingName <<
" bindingId: '" 614 <<
"' of iteration: " << iteration + 1 <<
" to file: '" 615 << m_Params.m_OutputTensorFiles[outputFileIndex] <<
"'";
620 armnn::MakeOptional<std::string>(
621 m_Params.m_OutputTensorFiles[outputFileIndex]);
628 !m_Params.m_DontPrintOutputs
631 std::cout << bindingName <<
": ";
632 std::vector<float> values;
633 switch (output.second.GetDataType())
637 PrintTensor<float>(outputWriteInfo,
"%f ");
643 PrintTensor<int>(outputWriteInfo,
"%d ");
649 PrintTensor<int8_t>(outputWriteInfo,
"%d ");
654 PrintTensor<uint8_t>(outputWriteInfo,
"%d ");
673 unsigned int index = 0;
675 for (
const auto& outputTensors: m_OutputTensorsVec)
677 for (
const auto& outputTensor: outputTensors)
680 size_t size = outputTensor.second.GetNumBytes();
682 switch (outputTensor.second.GetDataType())
686 result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
691 result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
697 result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
702 result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
710 std::cout <<
"RMSE: of " << result <<
"\n";
714 #if defined(ARMNN_SERIALIZER) 721 std::ifstream file(modelPath, std::ios::binary);
722 return m_Parser->CreateNetworkFromBinary(file);
726 ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(
size_t,
const std::string& inputName)
733 ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(
size_t,
const std::string& outputName)
740 #if defined(ARMNN_TF_LITE_PARSER) 753 return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
757 const std::string& inputName)
759 return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
763 const std::string& outputName)
765 return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
770 #if defined(ARMNN_ONNX_PARSER) 777 std::map<std::string, armnn::TensorShape> inputShapes;
781 const size_t numInputBindings = params.
m_InputNames.size();
782 if(numInputShapes < numInputBindings)
785 fmt::format(
"Not every input has its tensor shape specified: expected={0}, got={1}",
786 numInputBindings, numInputShapes));
789 for (
size_t i = 0; i < numInputShapes; i++)
795 m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
796 m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
801 m_Parser->CreateNetworkFromBinaryFile(params.
m_ModelPath.c_str()) :
802 m_Parser->CreateNetworkFromTextFile(params.
m_ModelPath.c_str());
805 armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(
size_t,
const std::string& inputName)
807 return m_Parser->GetNetworkInputBindingInfo(inputName);
810 armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(
size_t,
const std::string& outputName)
812 return m_Parser->GetNetworkOutputBindingInfo(outputName);
ModelOptions m_ModelOptions
static IRuntimePtr Create(const CreationOptions &options)
Interface for a layer that is connectable to other layers via InputSlots and OutputSlots.
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
void LogAndThrow(std::string eMsg)
ShapeInferenceMethod m_shapeInferenceMethod
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
const TensorShape & GetShape() const
bool m_OutputDetailsOnlyToStdOut
std::string m_DynamicBackendsPath
unsigned int GetNumBytes() const
std::vector< armnn::TensorShape > m_InputTensorShapes
#define ARMNN_LOG(severity)
Main network class which provides the interface for building up a neural network. ...
static IDeserializerPtr Create()
bool m_ReduceFp32ToBf16
Reduces all Fp32 operators in the model to Bf16 for faster processing.
std::vector< std::pair< LayerBindingId, class ConstTensor > > InputTensors
std::chrono::high_resolution_clock::time_point GetTimeNow()
void CompareAndPrintResult(std::vector< const void *> otherOutput) override
Compare the output with the result of another IExecutor.
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
std::vector< std::string > m_OutputTensorFiles
static ITfLiteParserPtr Create(const armnn::Optional< TfLiteParserOptions > &options=armnn::EmptyOptional())
bool m_OutputDetailsToStdOut
int LayerBindingId
Type of identifiers for bindable layers (inputs, outputs).
ProfilingDetailsMethod
Define the behaviour of the internal profiler when outputting network details.
constexpr const char * GetDataTypeName(DataType dataType)
Base class for all descriptors.
A tensor defined by a TensorInfo (shape and data type) and a mutable backing store.
bool m_ReduceFp32ToFp16
Reduces all Fp32 operators in the model to Fp16 for faster processing.
bool m_GenerateTensorData
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
IOptimizedNetworkPtr Optimize(const INetwork &network, const std::vector< BackendId > &backendPreferences, const IDeviceSpec &deviceSpec, const OptimizerOptions &options=OptimizerOptions(), Optional< std::vector< std::string > &> messages=EmptyOptional())
Create an optimized version of the network.
int32_t GetQuantizationOffset() const
float GetQuantizationScale() const
DataType GetDataType() const
A tensor defined by a TensorInfo (shape and data type) and an immutable backing store.
std::vector< std::string > m_InputNames
Validate all output shapes.
std::vector< std::pair< LayerBindingId, class Tensor > > OutputTensors
std::unique_ptr< IOptimizedNetwork, void(*)(IOptimizedNetwork *network)> IOptimizedNetworkPtr
ArmNN performs an optimization on each model/network before it gets loaded for execution.
virtual LayerType GetType() const =0
Returns the armnn::LayerType of this layer.
std::vector< std::string > m_InputTensorDataFilePaths
void PrintNetworkInfo() override
Print available information about the network.
void ExecuteStrategy(IStrategy &strategy) const
Struct for the users to pass backend specific options.
std::string m_DynamicBackendsPath
Setting this value will override the paths set by the DYNAMIC_BACKEND_PATHS compiler directive Only a...
bool m_EnableGpuProfiling
Setting this flag will allow the user to obtain GPU profiling information from the runtime...
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
std::pair< armnn::LayerBindingId, armnn::TensorInfo > BindingPointInfo
bool m_ImportInputsIfAligned
static IOnnxParserPtr Create()
armnn::TensorInfo m_TensorInfo
Base class for all ArmNN exceptions so that users can filter to just those.
virtual const IInputSlot & GetInputSlot(unsigned int index) const =0
Get a const input slot handle by slot index.
Status SerializeToDot(std::ostream &stream) const
std::vector< const void *> Execute() override
Execute the given network.
Infer missing output shapes and validate all output shapes.
virtual const TensorInfo & GetTensorInfo() const =0
virtual const IOutputSlot & GetOutputSlot(unsigned int index) const =0
Get the const output slot handle by slot index.
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
std::unique_ptr< INetwork, void(*)(INetwork *network)> INetworkPtr
armnn::LayerBindingId m_BindingId
ArmNNExecutor(const ExecuteNetworkParams ¶ms, armnn::IRuntime::CreationOptions runtimeOptions)
unsigned int GetNumElements() const
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()
bool m_StandInLayerForUnsupported