16 #if defined(ARMNN_SERIALIZER) 19 #if defined(ARMNN_TF_LITE_PARSER) 22 #if defined(ARMNN_ONNX_PARSER) 25 #if defined(ARMNN_TFLITE_DELEGATE) 29 #include <tensorflow/lite/builtin_ops.h> 30 #include <tensorflow/lite/c/builtin_op_data.h> 31 #include <tensorflow/lite/c/common.h> 32 #include <tensorflow/lite/optional_debug_tools.h> 33 #include <tensorflow/lite/kernels/builtin_op_kernels.h> 34 #include <tensorflow/lite/interpreter.h> 35 #include <tensorflow/lite/kernels/register.h> 48 const double& thresholdTime)
50 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
51 << std::fixed << duration.count() <<
" ms\n";
53 if (thresholdTime != 0.0)
55 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
56 << std::fixed << thresholdTime <<
" ms";
57 auto thresholdMinusInference = thresholdTime - duration.count();
58 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
59 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
60 if (thresholdMinusInference < 0)
62 std::string errorMessage =
"Elapsed inference time is greater than provided threshold time.";
70 #if defined(ARMNN_TFLITE_DELEGATE) 75 std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.
m_ModelPath.c_str());
77 auto tfLiteInterpreter = std::make_unique<Interpreter>();
78 tflite::ops::builtin::BuiltinOpResolver resolver;
80 tflite::InterpreterBuilder builder(*model, resolver);
81 builder(&tfLiteInterpreter);
82 tfLiteInterpreter->AllocateTensors();
92 std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
96 status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
97 if (status == kTfLiteError)
99 ARMNN_LOG(fatal) <<
"Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
105 std::cout <<
"Running on TfLite without ArmNN delegate\n";
114 for(
unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
116 int input = tfLiteInterpreter->inputs()[inputIndex];
117 TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
119 unsigned int inputSize = 1;
126 for (
unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
128 inputSize *= inputDims->data[dim];
132 if (params.
m_InputTypes[inputIndex].compare(
"float") == 0)
134 auto inputData = tfLiteInterpreter->typed_tensor<
float>(input);
136 if(inputData == NULL)
138 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 139 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
143 std::vector<float> tensorData;
144 PopulateTensorWithDataGeneric<float>(tensorData,
147 [](
const std::string& s)
148 {
return std::stof(s); });
150 std::copy(tensorData.begin(), tensorData.end(), inputData);
152 else if (params.
m_InputTypes[inputIndex].compare(
"qsymms8") == 0)
154 auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
156 if(inputData == NULL)
158 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 159 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
163 std::vector<int8_t> tensorData;
164 PopulateTensorWithDataGeneric<int8_t>(tensorData,
167 [](
const std::string& s)
170 std::copy(tensorData.begin(), tensorData.end(), inputData);
172 else if (params.
m_InputTypes[inputIndex].compare(
"int") == 0)
174 auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
176 if(inputData == NULL)
178 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 179 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
183 std::vector<int32_t> tensorData;
184 PopulateTensorWithDataGeneric<int32_t>(tensorData,
187 [](
const std::string& s)
188 {
return std::stoi(s); });
190 std::copy(tensorData.begin(), tensorData.end(), inputData);
192 else if (params.
m_InputTypes[inputIndex].compare(
"qasymm8") == 0 ||
193 params.
m_InputTypes[inputIndex].compare(
"qasymmu8") == 0)
195 auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
197 if(inputData == NULL)
199 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 200 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
204 std::vector<uint8_t> tensorData;
205 PopulateTensorWithDataGeneric<uint8_t>(tensorData,
208 [](
const std::string& s)
211 std::copy(tensorData.begin(), tensorData.end(), inputData);
213 else if (params.
m_InputTypes[inputIndex].compare(
"qasymms8") == 0)
215 auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
217 if(inputData == NULL)
219 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 220 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
224 std::vector<int8_t> tensorData;
225 PopulateTensorWithDataGeneric<int8_t>(tensorData,
228 [](
const std::string& s)
231 std::copy(tensorData.begin(), tensorData.end(), inputData);
235 ARMNN_LOG(fatal) <<
"Unsupported input tensor data type \"" << params.
m_InputTypes[inputIndex] <<
"\". ";
245 status = tfLiteInterpreter->Invoke();
249 for (
unsigned int outputIndex = 0; outputIndex < params.
m_OutputNames.size(); ++outputIndex)
251 auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
252 TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
254 FILE* outputTensorFile = stdout;
258 if (outputTensorFile == NULL)
260 ARMNN_LOG(fatal) <<
"Specified output tensor file, \"" <<
262 "\", cannot be created. Defaulting to stdout. " <<
263 "Error was: " << std::strerror(errno);
264 outputTensorFile = stdout;
268 ARMNN_LOG(info) <<
"Writing output " << outputIndex <<
"' of iteration: " << x+1 <<
" to file: '" 273 for (
unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
275 outputSize *= outputDims->data[dim];
281 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<
float>(tfLiteDelegateOutputId);
282 if(tfLiteDelageOutputData == NULL)
284 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 285 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
291 for (
int i = 0; i < outputSize; ++i)
293 fprintf(outputTensorFile,
"%f ", tfLiteDelageOutputData[i]);
297 else if (params.
m_OutputTypes[outputIndex].compare(
"int") == 0)
299 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
300 if(tfLiteDelageOutputData == NULL)
302 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 303 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
309 for (
int i = 0; i < outputSize; ++i)
311 fprintf(outputTensorFile,
"%d ", tfLiteDelageOutputData[i]);
315 else if (params.
m_OutputTypes[outputIndex].compare(
"qsymms8") == 0)
317 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
318 if(tfLiteDelageOutputData == NULL)
320 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 321 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
327 for (
int i = 0; i < outputSize; ++i)
329 fprintf(outputTensorFile,
"%d ", tfLiteDelageOutputData[i]);
333 else if (params.
m_OutputTypes[outputIndex].compare(
"qasymm8") == 0 ||
336 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
337 if(tfLiteDelageOutputData == NULL)
339 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 340 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
346 for (
int i = 0; i < outputSize; ++i)
348 fprintf(outputTensorFile,
"%u ", tfLiteDelageOutputData[i]);
354 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 356 "\" may be incorrect. Output type can be specified with -z argument";
359 std::cout << std::endl;
367 template<
typename TParser,
typename TDataType>
369 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
373 std::vector<std::vector<armnnUtils::TContainer>> inputs;
374 std::vector<std::vector<armnnUtils::TContainer>> outputs;
425 armnn::MakeOptional<QuantizationParams>(
431 ARMNN_LOG(info) <<
"Given network has " << numInputs <<
" input/s. One input-tensor-data file is required " 432 <<
"for each input. The user provided " 434 <<
" input-tensor-data file/s which will be used to fill the input/s.\n";
439 std::vector<armnnUtils::TContainer> inputDataContainers;
440 for(
unsigned int i = 0; i < numInputs; ++i)
444 size_t inputFileIndex = j * numInputs + i;
452 armnn::MakeOptional<std::string>(
470 inputDataContainers.push_back(tensorData);
472 inputs.push_back(inputDataContainers);
491 ARMNN_LOG(warning) <<
"Model output index: " << outputIdx <<
" has data type Float32. The " <<
492 "corresponding --output-type is " << params.
m_OutputTypes[outputIdx] <<
493 ". This may cause unexpected problems or random failures.";
497 if (params.
m_OutputTypes[outputIdx].compare(
"qasymmu8") != 0)
499 ARMNN_LOG(warning) <<
"Model output index: " << outputIdx <<
" has data type QAsymmU8. The " <<
500 "corresponding --output-type is " << params.
m_OutputTypes[outputIdx] <<
501 ". This may cause unexpected problemsor random failures.";
507 ARMNN_LOG(warning) <<
"Model output index: " << outputIdx <<
" has data type Signed32. The " <<
508 "corresponding --output-type is " << params.
m_OutputTypes[outputIdx] <<
509 ". This may cause unexpected problems or random failures.";
513 if (params.
m_OutputTypes[outputIdx].compare(
"qasymms8") != 0)
515 ARMNN_LOG(warning) <<
"Model output index: " << outputIdx <<
" has data type QAsymmS8. The " <<
516 "corresponding --output-type is " << params.
m_OutputTypes[outputIdx] <<
517 ". This may cause unexpected problems or random failures.";
526 std::vector <armnnUtils::TContainer> outputDataContainers;
527 for (
unsigned int i = 0; i < numOutputs; ++i)
531 outputDataContainers.push_back(std::vector<float>(model.
GetOutputSize(i)));
535 outputDataContainers.push_back(std::vector<int>(model.
GetOutputSize(i)));
540 outputDataContainers.push_back(std::vector<uint8_t>(model.
GetOutputSize(i)));
544 outputDataContainers.push_back(std::vector<int8_t>(model.
GetOutputSize(i)));
551 outputs.push_back(outputDataContainers);
556 std::stringstream msg;
557 msg <<
"Network will be executed " << params.
m_Iterations;
560 msg <<
" times in an asynchronous manner. ";
564 msg <<
" times successively. ";
566 msg <<
"The input-tensor-data files will be reused recursively if the user didn't provide enough to " 567 "cover each execution.";
577 auto inference_duration = model.
Run(inputs[x], outputs[x]);
581 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
585 ARMNN_LOG(info) <<
"Printing outputs to console is disabled.";
590 for (
size_t i = 0; i < numOutputs; i++)
598 size_t outputFileIndex = x * numOutputs + i;
602 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 604 <<
"' of iteration: " << x+1 <<
" to file: '" 616 mapbox::util::apply_visitor(printer, outputs[x][i]);
619 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
620 << std::fixed << inference_duration.count() <<
" ms\n";
625 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
627 auto thresholdMinusInference = params.
m_ThresholdTime - inference_duration.count();
628 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
629 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
631 if (thresholdMinusInference < 0)
633 std::string errorMessage =
"Elapsed inference time is greater than provided threshold time.";
644 ARMNN_LOG(info) <<
"Asynchronous execution with Arm NN thread pool... \n";
646 std::unordered_map<armnn::InferenceId, std::vector<armnnUtils::TContainer>&> inferenceOutputMap;
649 std::chrono::high_resolution_clock::time_point earliestStartTime;
650 std::chrono::high_resolution_clock::time_point latestEndTime =
651 std::chrono::high_resolution_clock::now();
657 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.
GetNewCallback();
658 inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
659 model.
RunAsync(inputs[i], outputs[i], cb);
664 for (
size_t iteration = 0; iteration < params.
m_Iterations; ++iteration)
669 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
670 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
671 auto inferenceDuration = endTime - startTime;
673 if (latestEndTime < cb->GetEndTime())
675 latestEndTime = cb->GetEndTime();
678 if (earliestStartTime.time_since_epoch().count() == 0)
680 earliestStartTime = cb->GetStartTime();
682 else if (earliestStartTime > cb->GetStartTime())
684 earliestStartTime = cb->GetStartTime();
689 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
693 ARMNN_LOG(info) <<
"Printing outputs to console is disabled.";
698 for (
size_t i = 0; i < numOutputs; i++)
704 size_t outputFileIndex = iteration * numOutputs + i;
708 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 710 <<
"' of iteration: " << iteration+1 <<
" to file: '" 724 mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
731 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
732 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
733 auto totalInferenceDuration = overallEndTime - overallStartTime;
734 ARMNN_LOG(info) <<
"\nOverall Inference time: " << std::setprecision(2)
735 << std::fixed << totalInferenceDuration.count() <<
" ms\n";
748 ARMNN_LOG(info) <<
"Asynchronous Execution with std::launch:async... \n";
749 std::vector<std::future<std::tuple<
unsigned int,
750 std::chrono::duration<double, std::milli>>>> inferenceResults;
754 std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
768 inferenceResults.push_back(std::async(
769 std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
770 return model.
RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
776 for (
unsigned int j = 0; j < inferenceResults.size(); ++j)
779 auto inferenceResult = inferenceResults[j].get();
780 auto inferenceDuration = std::get<1>(inferenceResult);
781 auto inferenceID = std::get<0>(inferenceResult);
785 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
789 ARMNN_LOG(info) <<
"Printing outputs to console is disabled.";
794 for (
size_t i = 0; i < numOutputs; i++)
800 size_t outputFileIndex = j * numOutputs + i;
804 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 806 <<
"' of iteration: " << j+1 <<
" to file: '" 819 mapbox::util::apply_visitor(printer, outputs[j][i]);
822 ARMNN_LOG(info) <<
"Asynchronous Execution is finished for Inference ID: " << inferenceID <<
" \n";
826 ARMNN_LOG(info) <<
"\nOverall Inference time: " << std::setprecision(2)
827 << std::fixed << duration.count() <<
" ms\n";
846 int main(
int argc,
const char* argv[])
862 }
catch (
const std::exception &e){
871 ARMNN_LOG(fatal) <<
"You must enable profiling if you would like to output layer details";
881 if (modelFormat.find(
"armnn") != std::string::npos)
883 #if defined(ARMNN_SERIALIZER) 884 return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.
m_ExNetParams, runtime);
886 ARMNN_LOG(fatal) <<
"Not built with serialization support.";
890 else if (modelFormat.find(
"onnx") != std::string::npos)
892 #if defined(ARMNN_ONNX_PARSER) 893 return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.
m_ExNetParams, runtime);
895 ARMNN_LOG(fatal) <<
"Not built with Onnx parser support.";
899 else if(modelFormat.find(
"tflite") != std::string::npos)
903 #if defined(ARMNN_TF_LITE_PARSER) 904 return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.
m_ExNetParams, runtime);
906 ARMNN_LOG(fatal) <<
"Not built with Tensorflow-Lite parser support.";
915 #if defined(ARMNN_TF_LITE_DELEGATE) 918 ARMNN_LOG(fatal) <<
"Not built with Arm NN Tensorflow-Lite delegate support.";
925 ARMNN_LOG(fatal) <<
"Unknown model format: '" << modelFormat
926 <<
"'. Please include 'tflite' or 'onnx'";
ExecuteNetworkParams m_ExNetParams
std::chrono::duration< double, std::milli > Run(const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers)
std::vector< std::string > m_InputTypes
static IRuntimePtr Create(const CreationOptions &options)
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
std::string m_MLGOTuningFilePath
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
bool m_OutputDetailsOnlyToStdOut
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
bool m_EnableFp16TurboMode
std::string m_DynamicBackendsPath
std::string m_DynamicBackendsPath
const armnn::BindingPointInfo & GetOutputBindingInfo(unsigned int outputIndex=0u) const
virtual const char * what() const noexcept override
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< armnnUtils::TContainer > &inputContainers, std::vector< armnnUtils::TContainer > &outputContainers, unsigned int inferenceID)
bool CheckInferenceTimeThreshold(const std::chrono::duration< double, std::milli > &duration, const double &thresholdTime)
Given a measured duration and a threshold time tell the user whether we succeeded or not...
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
std::vector< std::string > m_OutputNames
TfLiteExecutor m_TfLiteExecutor
std::vector< std::string > m_OutputTensorFiles
bool m_VisualizePostOptimizationModel
bool m_OutputDetailsToStdOut
std::string m_CachedNetworkFilePath
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
bool m_GenerateTensorData
void PopulateTensorWithData(armnnUtils::TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
unsigned int m_NumberOfThreads
std::vector< std::string > m_InputNames
bool m_EnableBf16TurboMode
std::vector< std::string > m_InputTensorDataFilePaths
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
TfLiteDelegate * TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
void SetExternalProfilingParams(const armnn::IRuntime::CreationOptions::ExternalProfilingOptions &externalProfilingParams)
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
bool m_EnableLayerDetails
int main(int argc, const char *argv[])
Base class for all ArmNN exceptions so that users can filter to just those.
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
unsigned int GetInputSize(unsigned int inputIndex=0u) const
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
void TfLiteArmnnDelegateDelete(TfLiteDelegate *tfLiteDelegate)
int MainImpl(const ExecuteNetworkParams ¶ms, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
unsigned int m_NumberOfThreads
bool m_OutputDetailsOnlyToStdOut
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
ExternalProfilingOptions m_ProfilingOptions
std::string m_ModelFormat
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()