15 #if defined(ARMNN_SERIALIZER) 18 #if defined(ARMNN_TF_LITE_PARSER) 21 #if defined(ARMNN_ONNX_PARSER) 24 #if defined(ARMNN_TFLITE_DELEGATE) 28 #include <tensorflow/lite/builtin_ops.h> 29 #include <tensorflow/lite/c/builtin_op_data.h> 30 #include <tensorflow/lite/c/common.h> 31 #include <tensorflow/lite/optional_debug_tools.h> 32 #include <tensorflow/lite/kernels/builtin_op_kernels.h> 33 #include <tensorflow/lite/interpreter.h> 34 #include <tensorflow/lite/kernels/register.h> 38 #if defined(ARMNN_TFLITE_DELEGATE) 40 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
44 std::unique_ptr<tflite::FlatBufferModel> model = tflite::FlatBufferModel::BuildFromFile(params.
m_ModelPath.c_str());
46 auto tfLiteInterpreter = std::make_unique<Interpreter>();
47 tflite::ops::builtin::BuiltinOpResolver resolver;
49 tflite::InterpreterBuilder builder(*model, resolver);
50 builder(&tfLiteInterpreter);
51 tfLiteInterpreter->AllocateTensors();
58 std::unique_ptr<TfLiteDelegate, decltype(&armnnDelegate::TfLiteArmnnDelegateDelete)>
62 status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate));
63 if (status == kTfLiteError)
65 ARMNN_LOG(fatal) <<
"Could not register ArmNN TfLite Delegate to TfLiteInterpreter!";
71 std::cout <<
"Running on TfLite without ArmNN delegate\n";
75 std::vector<std::string> inputBindings;
78 inputBindings.push_back(inputName);
85 const size_t numInputs = inputBindings.size();
87 for(
unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex)
89 int input = tfLiteInterpreter->inputs()[inputIndex];
90 TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims;
93 for (
unsigned int dim = 0; dim < static_cast<unsigned int>(inputDims->size); ++dim)
95 inputSize *= inputDims->data[dim];
98 if (params.
m_InputTypes[inputIndex].compare(
"float") == 0)
100 auto inputData = tfLiteInterpreter->typed_tensor<
float>(input);
102 if(inputData == NULL)
104 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 105 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
109 std::vector<float> tensorData;
110 PopulateTensorWithDataGeneric<float>(tensorData,
113 [](
const std::string& s)
114 {
return std::stof(s); });
116 std::copy(tensorData.begin(), tensorData.end(), inputData);
118 else if (params.
m_InputTypes[inputIndex].compare(
"qsymms8") == 0)
120 auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
122 if(inputData == NULL)
124 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 125 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
129 std::vector<int8_t> tensorData;
130 PopulateTensorWithDataGeneric<int8_t>(tensorData,
133 [](
const std::string& s)
136 std::copy(tensorData.begin(), tensorData.end(), inputData);
138 else if (params.
m_InputTypes[inputIndex].compare(
"int") == 0)
140 auto inputData = tfLiteInterpreter->typed_tensor<int32_t>(input);
142 if(inputData == NULL)
144 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 145 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
149 std::vector<int32_t> tensorData;
150 PopulateTensorWithDataGeneric<int32_t>(tensorData,
153 [](
const std::string& s)
154 {
return std::stoi(s); });
156 std::copy(tensorData.begin(), tensorData.end(), inputData);
158 else if (params.
m_InputTypes[inputIndex].compare(
"qasymm8") == 0 ||
159 params.
m_InputTypes[inputIndex].compare(
"qasymmu8") == 0)
161 auto inputData = tfLiteInterpreter->typed_tensor<uint8_t>(input);
163 if(inputData == NULL)
165 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 166 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
170 std::vector<uint8_t> tensorData;
171 PopulateTensorWithDataGeneric<uint8_t>(tensorData,
174 [](
const std::string& s)
177 std::copy(tensorData.begin(), tensorData.end(), inputData);
179 else if (params.
m_InputTypes[inputIndex].compare(
"qasymms8") == 0)
181 auto inputData = tfLiteInterpreter->typed_tensor<int8_t>(input);
183 if(inputData == NULL)
185 ARMNN_LOG(fatal) <<
"Input tensor is null, input type: " 186 "\"" << params.
m_InputTypes[inputIndex] <<
"\" may be incorrect.";
190 std::vector<int8_t> tensorData;
191 PopulateTensorWithDataGeneric<int8_t>(tensorData,
194 [](
const std::string& s)
197 std::copy(tensorData.begin(), tensorData.end(), inputData);
201 ARMNN_LOG(fatal) <<
"Unsupported input tensor data type \"" << params.
m_InputTypes[inputIndex] <<
"\". ";
209 status = tfLiteInterpreter->Invoke();
212 for (
unsigned int outputIndex = 0; outputIndex < params.
m_OutputNames.size(); ++outputIndex)
214 auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex];
215 TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims;
218 for (
unsigned int dim = 0; dim < static_cast<unsigned int>(outputDims->size); ++dim)
220 outputSize *= outputDims->data[dim];
226 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<
float>(tfLiteDelegateOutputId);
227 if(tfLiteDelageOutputData == NULL)
229 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 230 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
234 for (
int i = 0; i < outputSize; ++i)
236 printf(
"%f ", tfLiteDelageOutputData[i]);
239 else if (params.
m_OutputTypes[outputIndex].compare(
"int") == 0)
241 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int32_t>(tfLiteDelegateOutputId);
242 if(tfLiteDelageOutputData == NULL)
244 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 245 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
249 for (
int i = 0; i < outputSize; ++i)
251 printf(
"%d ", tfLiteDelageOutputData[i]);
254 else if (params.
m_OutputTypes[outputIndex].compare(
"qsymms8") == 0)
256 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<int8_t>(tfLiteDelegateOutputId);
257 if(tfLiteDelageOutputData == NULL)
259 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 260 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
264 for (
int i = 0; i < outputSize; ++i)
266 printf(
"%d ", tfLiteDelageOutputData[i]);
269 else if (params.
m_OutputTypes[outputIndex].compare(
"qasymm8") == 0 ||
272 auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor<uint8_t>(tfLiteDelegateOutputId);
273 if(tfLiteDelageOutputData == NULL)
275 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 276 "\"" << params.
m_OutputTypes[outputIndex] <<
"\" may be incorrect.";
280 for (
int i = 0; i < outputSize; ++i)
282 printf(
"%u ", tfLiteDelageOutputData[i]);
287 ARMNN_LOG(fatal) <<
"Output tensor is null, output type: " 289 "\" may be incorrect. Output type can be specified with -z argument";
292 std::cout << std::endl;
299 template<
typename TParser,
typename TDataType>
301 const std::shared_ptr<armnn::IRuntime>& runtime =
nullptr)
305 std::vector<std::vector<TContainer>> inputs;
306 std::vector<std::vector<TContainer>> outputs;
356 armnn::MakeOptional<QuantizationParams>(
362 ARMNN_LOG(info) <<
"Given network has " << numInputs <<
" input/s. One input-tensor-data file is required " 363 <<
"for each input. The user provided " 365 <<
" input-tensor-data file/s which will be used to fill the input/s.\n";
370 std::vector<TContainer> inputDataContainers;
371 for(
unsigned int i = 0; i < numInputs; ++i)
375 size_t inputFileIndex = j * numInputs + i;
383 armnn::MakeOptional<std::string>(
401 inputDataContainers.push_back(tensorData);
403 inputs.push_back(inputDataContainers);
410 std::vector <TContainer> outputDataContainers;
411 for (
unsigned int i = 0; i < numOutputs; ++i)
415 outputDataContainers.push_back(std::vector<float>(model.
GetOutputSize(i)));
419 outputDataContainers.push_back(std::vector<int>(model.
GetOutputSize(i)));
424 outputDataContainers.push_back(std::vector<uint8_t>(model.
GetOutputSize(i)));
428 outputDataContainers.push_back(std::vector<int8_t>(model.
GetOutputSize(i)));
435 outputs.push_back(outputDataContainers);
440 std::stringstream msg;
441 msg <<
"Network will be executed " << params.
m_Iterations;
444 msg <<
" times in an asynchronous manner. ";
448 msg <<
" times successively. ";
450 msg <<
"The input-tensor-data files will be reused recursively if the user didn't provide enough to " 451 "cover each execution.";
461 auto inference_duration = model.
Run(inputs[x], outputs[x]);
465 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
470 for (
size_t i = 0; i < numOutputs; i++)
478 size_t outputFileIndex = x * numOutputs + i;
482 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 484 <<
"' of iteration: " << x+1 <<
" to file: '" 495 mapbox::util::apply_visitor(printer, outputs[x][i]);
498 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
499 << std::fixed << inference_duration.count() <<
" ms\n";
504 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
506 auto thresholdMinusInference = params.
m_ThresholdTime - inference_duration.count();
507 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
508 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
510 if (thresholdMinusInference < 0)
512 std::string errorMessage =
"Elapsed inference time is greater than provided threshold time.";
523 ARMNN_LOG(info) <<
"Asynchronous execution with Arm NN thread pool... \n";
525 std::unordered_map<armnn::InferenceId, std::vector<TContainer>&> inferenceOutputMap;
528 std::chrono::high_resolution_clock::time_point earliestStartTime;
529 std::chrono::high_resolution_clock::time_point latestEndTime =
530 std::chrono::high_resolution_clock::now();
536 std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.
GetNewCallback();
537 inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]});
538 model.
RunAsync(inputs[i], outputs[i], cb);
543 for (
size_t iteration = 0; iteration < params.
m_Iterations; ++iteration)
548 auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
549 auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
550 auto inferenceDuration = endTime - startTime;
552 if (latestEndTime < cb->GetEndTime())
554 latestEndTime = cb->GetEndTime();
557 if (earliestStartTime.time_since_epoch().count() == 0)
559 earliestStartTime = cb->GetStartTime();
561 else if (earliestStartTime > cb->GetStartTime())
563 earliestStartTime = cb->GetStartTime();
568 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
573 for (
size_t i = 0; i < numOutputs; i++)
579 size_t outputFileIndex = iteration * numOutputs + i;
583 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 585 <<
"' of iteration: " << iteration+1 <<
" to file: '" 598 mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]);
601 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
602 << std::fixed << inferenceDuration.count() <<
" ms\n";
607 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
609 auto thresholdMinusInference =
610 params.
m_ThresholdTime - duration<double, std::milli>(inferenceDuration).count();
611 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
612 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
614 if (thresholdMinusInference < 0)
616 ARMNN_LOG(fatal) <<
"Elapsed inference time is greater than provided threshold time. \n";
622 auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
623 auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
624 auto totalInferenceDuration = overallEndTime - overallStartTime;
625 ARMNN_LOG(info) <<
"\nOverall Inference time: " << std::setprecision(2)
626 << std::fixed << totalInferenceDuration.count() <<
" ms\n";
639 ARMNN_LOG(info) <<
"Asynchronous Execution with std::launch:async... \n";
640 std::vector<std::future<std::tuple<
unsigned int,
641 std::chrono::duration<double, std::milli>>>> inferenceResults;
645 std::vector<std::unique_ptr<armnn::experimental::IWorkingMemHandle>> workingMemHandles;
659 inferenceResults.push_back(std::async(
660 std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() {
661 return model.
RunAsync(workingMemHandleRef, inputs[i], outputs[i], i);
667 for (
unsigned int j = 0; j < inferenceResults.size(); ++j)
670 auto inferenceResult = inferenceResults[j].get();
671 auto inferenceDuration = std::get<1>(inferenceResult);
672 auto inferenceID = std::get<0>(inferenceResult);
676 ARMNN_LOG(warning) <<
"The input data was generated, note that the output will not be useful";
681 for (
size_t i = 0; i < numOutputs; i++)
687 size_t outputFileIndex = j * numOutputs + i;
691 ARMNN_LOG(info) <<
"Writing output " << i <<
" named: '" 693 <<
"' of iteration: " << j+1 <<
" to file: '" 705 mapbox::util::apply_visitor(printer, outputs[j][i]);
708 ARMNN_LOG(info) <<
"\nInference time: " << std::setprecision(2)
709 << std::fixed << inferenceDuration.count() <<
" ms\n";
714 ARMNN_LOG(info) <<
"Threshold time: " << std::setprecision(2)
716 auto thresholdMinusInference = params.
m_ThresholdTime - inferenceDuration.count();
717 ARMNN_LOG(info) <<
"Threshold time - Inference time: " << std::setprecision(2)
718 << std::fixed << thresholdMinusInference <<
" ms" <<
"\n";
720 if (thresholdMinusInference < 0)
722 ARMNN_LOG(fatal) <<
"Elapsed inference time is greater than provided threshold time. \n";
725 ARMNN_LOG(info) <<
"Asynchronous Execution is finished for Inference ID: " << inferenceID <<
" \n";
730 ARMNN_LOG(info) <<
"\nOverall Inference time: " << std::setprecision(2)
731 << std::fixed << duration.count() <<
" ms\n";
751 int main(
int argc,
const char* argv[])
767 }
catch (
const std::exception &e){
774 ARMNN_LOG(fatal) <<
"You must enable profiling if you would like to output layer details";
784 if (modelFormat.find(
"armnn") != std::string::npos)
786 #if defined(ARMNN_SERIALIZER) 787 return MainImpl<armnnDeserializer::IDeserializer, float>(ProgramOptions.
m_ExNetParams, runtime);
789 ARMNN_LOG(fatal) <<
"Not built with serialization support.";
793 else if (modelFormat.find(
"onnx") != std::string::npos)
795 #if defined(ARMNN_ONNX_PARSER) 796 return MainImpl<armnnOnnxParser::IOnnxParser, float>(ProgramOptions.
m_ExNetParams, runtime);
798 ARMNN_LOG(fatal) <<
"Not built with Onnx parser support.";
802 else if(modelFormat.find(
"tflite") != std::string::npos)
806 #if defined(ARMNN_TF_LITE_PARSER) 807 return MainImpl<armnnTfLiteParser::ITfLiteParser, float>(ProgramOptions.
m_ExNetParams, runtime);
809 ARMNN_LOG(fatal) <<
"Not built with Tensorflow-Lite parser support.";
818 #if defined(ARMNN_TF_LITE_DELEGATE) 819 return TfLiteDelegateMainImpl(ProgramOptions.
m_ExNetParams, runtime);
821 ARMNN_LOG(fatal) <<
"Not built with Arm NN Tensorflow-Lite delegate support.";
828 ARMNN_LOG(fatal) <<
"Unknown model format: '" << modelFormat
829 <<
"'. Please include 'tflite' or 'onnx'";
ExecuteNetworkParams m_ExNetParams
std::vector< std::string > m_InputTypes
static IRuntimePtr Create(const CreationOptions &options)
std::chrono::duration< double, std::milli > GetTimeDuration(std::chrono::high_resolution_clock::time_point start_time)
std::string m_MLGOTuningFilePath
std::shared_ptr< AsyncExecutionCallback > GetNewCallback()
std::vector< TensorShapePtr > m_InputTensorShapes
QuantizationParams GetInputQuantizationParams(unsigned int inputIndex=0u) const
const std::vector< armnn::BindingPointInfo > & GetOutputBindingInfos() const
void ConfigureLogging(bool printToStandardOutput, bool printToDebugOutput, LogSeverity severity)
Configures the logging behaviour of the ARMNN library.
bool m_EnableFp16TurboMode
std::string m_DynamicBackendsPath
std::string m_DynamicBackendsPath
virtual const char * what() const noexcept override
armnn::IRuntime::CreationOptions m_RuntimeOptions
#define ARMNN_LOG(severity)
bool m_EnableFp16TurboMode
std::chrono::high_resolution_clock::time_point GetTimeNow()
void PopulateTensorWithData(TContainer &tensorData, unsigned int numElements, const std::string &dataTypeStr, const armnn::Optional< QuantizationParams > &qParams, const armnn::Optional< std::string > &dataFile)
std::vector< std::string > m_OutputNames
Copyright (c) 2021 ARM Limited and Contributors.
TfLiteExecutor m_TfLiteExecutor
std::vector< std::string > m_OutputTensorFiles
bool m_VisualizePostOptimizationModel
bool m_OutputDetailsToStdOut
std::string m_CachedNetworkFilePath
bool m_EnableBf16TurboMode
unsigned int GetOutputSize(unsigned int outputIndex=0u) const
std::vector< std::string > m_InputBindings
std::vector< armnn::BackendId > m_ComputeDevices
std::vector< std::string > m_OutputTypes
std::vector< armnn::TensorShape > m_InputShapes
bool m_GenerateTensorData
Holds all parameters necessary to execute a network Check ExecuteNetworkProgramOptions.cpp for a description of each parameter.
std::vector< std::string > m_OutputBindings
std::vector< armnn::BackendId > m_ComputeDevices
unsigned int m_NumberOfThreads
mapbox::util::variant< std::vector< float >, std::vector< int >, std::vector< unsigned char >, std::vector< int8_t > > TContainer
std::vector< std::string > m_InputNames
bool m_EnableBf16TurboMode
std::vector< std::string > m_InputTensorDataFilePaths
std::tuple< unsigned int, std::chrono::duration< double, std::milli > > RunAsync(armnn::experimental::IWorkingMemHandle &workingMemHandleRef, const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers, unsigned int inferenceID)
void ParseOptions(int ac, const char *av[])
Parses program options from the command line or another source and stores the values in member variab...
Holds and parses program options for the ExecuteNetwork application.
TfLiteDelegate * TfLiteArmnnDelegateCreate(armnnDelegate::DelegateOptions options)
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
bool m_PrintIntermediateLayers
std::string m_CachedNetworkFilePath
std::unique_ptr< armnn::experimental::IWorkingMemHandle > CreateWorkingMemHandle()
std::chrono::duration< double, std::milli > Run(const std::vector< TContainer > &inputContainers, std::vector< TContainer > &outputContainers)
bool m_EnableLayerDetails
int main(int argc, const char *argv[])
Base class for all ArmNN exceptions so that users can filter to just those.
unsigned int GetInputSize(unsigned int inputIndex=0u) const
bool m_OutputDetailsToStdOut
std::string m_MLGOTuningFilePath
void TfLiteArmnnDelegateDelete(TfLiteDelegate *tfLiteDelegate)
Optional< T > MakeOptional(Args &&... args)
Utility template that constructs an object of type T in-place and wraps it inside an Optional<T> obje...
int MainImpl(const ExecuteNetworkParams ¶ms, const std::shared_ptr< armnn::IRuntime > &runtime=nullptr)
unsigned int m_NumberOfThreads
std::enable_if_t< std::is_unsigned< Source >::value &&std::is_unsigned< Dest >::value, Dest > numeric_cast(Source source)
std::string m_ModelFormat
std::shared_ptr< AsyncExecutionCallback > GetNotifiedCallback()