From f4ccb1f6339a1e9ed573f188e7f14353167b5749 Mon Sep 17 00:00:00 2001 From: Nikhil Raj Arm Date: Tue, 5 Jul 2022 09:29:18 +0000 Subject: Revert "IVGCVSW-6650 Refactor ExecuteNetwork" This reverts commit 615e06f54a4c4139e81e289991ba4084aa2f69d3. Reason for revert: Change-Id: I06a4a0119463188a653bb749033f78514645bd0c --- tests/CMakeLists.txt | 10 - tests/ExecuteNetwork/ArmNNExecutor.cpp | 767 -------------- tests/ExecuteNetwork/ArmNNExecutor.hpp | 161 --- tests/ExecuteNetwork/ExecuteNetwork.cpp | 1074 +++++++++++++++++++- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 134 ++- tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 89 +- .../ExecuteNetworkProgramOptions.cpp | 126 +-- tests/ExecuteNetwork/IExecutor.hpp | 22 - tests/ExecuteNetwork/TfliteExecutor.cpp | 251 ----- tests/ExecuteNetwork/TfliteExecutor.hpp | 35 - tests/InferenceModel.hpp | 37 +- .../NetworkExecutionUtils.cpp | 309 +++++- .../NetworkExecutionUtils.hpp | 279 +---- 13 files changed, 1616 insertions(+), 1678 deletions(-) delete mode 100644 tests/ExecuteNetwork/ArmNNExecutor.cpp delete mode 100644 tests/ExecuteNetwork/ArmNNExecutor.hpp delete mode 100644 tests/ExecuteNetwork/IExecutor.hpp delete mode 100644 tests/ExecuteNetwork/TfliteExecutor.cpp delete mode 100644 tests/ExecuteNetwork/TfliteExecutor.hpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 87a5b46024..4cb324f2c7 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -139,9 +139,6 @@ if (BUILD_ARMNN_SERIALIZER OR BUILD_ONNX_PARSER OR BUILD_ARMNN_TFLITE_DELEGATE) set(ExecuteNetwork_sources - ExecuteNetwork/IExecutor.hpp - ExecuteNetwork/ArmNNExecutor.cpp - ExecuteNetwork/ArmNNExecutor.hpp ExecuteNetwork/ExecuteNetwork.cpp ExecuteNetwork/ExecuteNetworkProgramOptions.cpp ExecuteNetwork/ExecuteNetworkProgramOptions.hpp @@ -150,13 +147,6 @@ if (BUILD_ARMNN_SERIALIZER NetworkExecutionUtils/NetworkExecutionUtils.cpp NetworkExecutionUtils/NetworkExecutionUtils.hpp) - if(BUILD_ARMNN_TFLITE_DELEGATE) - set(ExecuteNetwork_sources - ${ExecuteNetwork_sources} - ExecuteNetwork/TfliteExecutor.cpp - ExecuteNetwork/TfliteExecutor.hpp) - endif() - add_executable_ex(ExecuteNetwork ${ExecuteNetwork_sources}) target_include_directories(ExecuteNetwork PRIVATE ../src/armnn) target_include_directories(ExecuteNetwork PRIVATE ../src/armnnUtils) diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp deleted file mode 100644 index 626155e28c..0000000000 --- a/tests/ExecuteNetwork/ArmNNExecutor.cpp +++ /dev/null @@ -1,767 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - - -#include "ArmNNExecutor.hpp" -#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" - -#include -#include - - -using namespace armnn; -using namespace std::chrono; - -ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions) -: m_Params(params) -{ - runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling; - runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath; - m_Runtime = armnn::IRuntime::Create(runtimeOptions); - - auto parser = CreateParser(); - auto network = parser->CreateNetwork(m_Params); - auto optNet = OptimizeNetwork(network.get()); - - m_IOInfo = GetIOInfo(network.get()); - SetupInputsAndOutputs(); - - std::string errorMsg; - INetworkProperties networkProperties{m_Params.m_Concurrent, MemorySource::Undefined, MemorySource::Undefined}; - m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties); - - if (m_Params.m_Iterations > 1) - { - std::stringstream msg; - msg << "Network will be executed " << m_Params.m_Iterations; - if (m_Params.m_Concurrent) - { - msg << " times in an asynchronous manner. "; - } - else - { - msg << " times successively. "; - } - msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to " - "cover each execution."; - ARMNN_LOG(info) << msg.str(); - } - - if (m_Params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - - if (m_Params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } -} - -void ArmNNExecutor::ExecuteAsync() -{ - std::vector> memHandles; - std::unique_ptr threadpool; - armnn::AsyncCallbackManager callbackManager; - std::unordered_map inferenceOutputMap; - - for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i) - { - memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId)); - } - - threadpool = std::make_unique(m_Params.m_ThreadPoolSize, - m_Runtime.get(), - memHandles); - - ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n"; - // Declare the latest and earliest inference times here to be used when calculating overall time - std::chrono::high_resolution_clock::time_point earliestStartTime = - std::chrono::high_resolution_clock::time_point::max(); - std::chrono::high_resolution_clock::time_point latestEndTime = - std::chrono::high_resolution_clock::now(); - - // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the - // LoadedNetwork with each scheduled inference having a specific priority - for (size_t i = 0; i < m_Params.m_Iterations; ++i) - { - std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); - - std::shared_ptr cb = callbackManager.GetNewCallback(); - inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]}); - threadpool->Schedule(m_NetworkId, - m_InputTensorsVec[i], - m_OutputTensorsVec[i], - armnn::QosExecPriority::Medium, - cb); - } - - // Check the results - for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration) - { - auto cb = callbackManager.GetNotifiedCallback(); - - // Get the results - if (earliestStartTime > cb->GetStartTime()) - { - earliestStartTime = cb->GetStartTime(); - } - if (latestEndTime < cb->GetEndTime()) - { - latestEndTime = cb->GetEndTime(); - } - - auto startTime = time_point_cast(cb->GetStartTime()); - auto endTime = time_point_cast(cb->GetEndTime()); - auto inferenceDuration = endTime - startTime; - CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime); - if(!m_Params.m_DontPrintOutputs) - { - const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()]; - PrintOutputTensors(out, iteration); - } - } - //print duration difference between overallStartTime and overallEndTime - auto overallEndTime = time_point_cast(latestEndTime); - auto overallStartTime = time_point_cast(earliestStartTime); - auto totalInferenceDuration = overallEndTime - overallStartTime; - ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) - << std::fixed << totalInferenceDuration.count() << " ms\n"; - -} - -void ArmNNExecutor::ExecuteSync() -{ - for (size_t x = 0; x < m_Params.m_Iterations; x++) - { - std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); - - const auto start_time = armnn::GetTimeNow(); - armnn::Status ret; - if (m_Params.m_ImportInputsIfAligned) - { - ret = m_Runtime->EnqueueWorkload(m_NetworkId, - m_InputTensorsVec[x], - m_OutputTensorsVec[x], - m_ImportedInputIds[x], - m_ImportedOutputIds[x]); - } - else - { - ret = m_Runtime->EnqueueWorkload(m_NetworkId, - m_InputTensorsVec[x], - m_OutputTensorsVec[x]); - } - - const auto inferenceDuration = armnn::GetTimeDuration(start_time); - - // if profiling is enabled print out the results - if(profiler && profiler->IsProfilingEnabled()) - { - profiler->Print(std::cout); - } - - if(ret == armnn::Status::Failure) - { - throw armnn::Exception("IRuntime::EnqueueWorkload failed"); - } - - if(!m_Params.m_DontPrintOutputs) - { - PrintOutputTensors(&m_OutputTensorsVec[x], x); - } - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime); - } -} - -std::vector ArmNNExecutor::Execute() -{ - if(m_Params.m_ThreadPoolSize == 0) - { - ExecuteSync(); - } - else - { - ExecuteAsync(); - } - std::vector results; - for (auto& output : m_OutputStorage) - { - results.push_back(output.m_Mem); - } - - return results; -} - -void ArmNNExecutor::PrintNetworkInfo() -{ - const std::vector& inputNames = m_Params.m_InputNames.size() != 0 ? - m_Params.m_InputNames : - m_IOInfo.m_InputNames; - std::stringstream ss; - ss << "===== Network Info =====\n"; - ss << "Inputs in order:\n" ; - for (const auto& inputName : inputNames) - { - const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second; - ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType()) << "\n"; - } - - ss << "Outputs in order:\n" ; - for (const auto& outputName : m_IOInfo.m_OutputNames) - { - const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second; - ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType()) << "\n"; - if (outputInfo.IsQuantized()) - { - ss << "Quantization Offset: " << outputInfo.GetQuantizationOffset(); - if (outputInfo.HasMultipleQuantizationScales()) - { - ss << "Quantization scales: "; - for (const auto scale: outputInfo.GetQuantizationScales()) - { - ss << scale << ", "; - } - ss << "\n"; - } - else - { - ss << "Quantization scale: " << outputInfo.GetQuantizationScale(); - } - } - } - - std::cout << ss.str() << std::endl; -} - -void ArmNNExecutor::SetupInputsAndOutputs() -{ - const unsigned int noOfInputs = m_IOInfo.m_InputNames.size(); - - if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs) - { - LogAndThrow("Number of input names does not match number of inputs\n"); - } - - const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size(); - const std::vector& inputNames = m_Params.m_InputNames.size() != 0 ? - m_Params.m_InputNames : - m_IOInfo.m_InputNames; - unsigned int noInputSets; - - if (inputFilePaths != 0) - { - if (inputFilePaths % noOfInputs != 0) - { - LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) + - " not compatible with number of inputs: " + std::to_string(noOfInputs)); - } - noInputSets = inputFilePaths / noOfInputs; - if (noInputSets != 1 && m_Params.m_ReuseBuffers) - { - LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers"); - } - } - else - { - noInputSets = 1; - } - - const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size(); - const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size(); - unsigned int noOutputSets; - - if (outputFilePaths != 0) - { - if (outputFilePaths % noOfOutputs != 0) - { - LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) + - ", not compatible with number of outputs: " + std::to_string(noOfOutputs)); - } - noOutputSets = outputFilePaths / noOfOutputs; - - if (noOutputSets != 1 && m_Params.m_ReuseBuffers) - { - LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers"); - } - } - else - { - noOutputSets = 1; - } - - if (m_Params.m_ThreadPoolSize != 0) - { - // The current implementation of the Threadpool does not allow binding of outputs to a thread - // So to ensure no two threads write to the same output at the same time, no output can be reused - noOutputSets = m_Params.m_Iterations; - } - - if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs) - { - ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required " - << "for each input. The user provided " - << m_Params.m_InputTensorDataFilePaths.size() - << " input-tensor-data file/s which will be used to fill the input/s.\n"; - } - - unsigned int inputCount = 0; - for(unsigned int inputSet = 0; inputSet < noInputSets ; inputSet++) - { - armnn::InputTensors inputTensors; - for (const auto& inputName: inputNames) - { - armnn::BindingPointInfo bindingPointInfo; - try - { - bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName); - } - catch (const std::out_of_range &e) - { - LogAndThrow("Input with inputName: " + inputName + " not found."); - } - - const armnn::TensorInfo &tensorInfo = bindingPointInfo.second; - auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(), - tensorInfo.GetQuantizationScale(), - tensorInfo.GetQuantizationOffset(), - true}; - - m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()}); - - const int bindingId = bindingPointInfo.first; - inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem}); - - const armnn::Optional dataFile = m_Params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - m_Params.m_InputTensorDataFilePaths.at(inputCount++)); - - switch (tensorInfo.GetDataType()) - { - case armnn::DataType::Float32: - { - auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); - PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); - break; - } - case armnn::DataType::QSymmS16: - { - auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); - PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); - break; - } - case armnn::DataType::QSymmS8: - { - auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); - PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); - break; - } - case armnn::DataType::QAsymmU8: - case armnn::DataType::QAsymmS8: - { - auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); - PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); - break; - } - default: - { - } - } - m_InputTensorsVec.push_back(inputTensors); - - if (m_Params.m_ImportInputsIfAligned) - { - m_ImportedInputIds.push_back( - m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc)); - } - } - } - - for(unsigned int outputSet = 0; outputSet < noOutputSets; outputSet++) - { - armnn::OutputTensors outputTensors; - for (const auto &output: m_IOInfo.m_OutputInfoMap) - { - const armnn::BindingPointInfo &bindingPointInfo = output.second; - const armnn::TensorInfo &tensorInfo = bindingPointInfo.second; - - m_OutputStorage.emplace_back(tensorInfo.GetNumBytes()); - outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem}); - } - m_OutputTensorsVec.emplace_back(outputTensors); - if (m_Params.m_ImportInputsIfAligned) - { - m_ImportedOutputIds.push_back( - m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc)); - } - } - - // Fill the remaining iterations with copies - const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets ; - for (unsigned int i = 1; i <= remainingInputSets; i++) - { - m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]); - if (m_Params.m_ImportInputsIfAligned) - { - m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]); - } - } - - const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets; - for (unsigned int i = 1; i <= remainingOutputSets; i++) - { - m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]); - if (m_Params.m_ImportInputsIfAligned) - { - m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]); - } - } -} - -ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::INetwork* network) -{ - struct IOStrategy : armnn::IStrategy - { - void ExecuteStrategy(const armnn::IConnectableLayer* layer, - const armnn::BaseDescriptor& descriptor, - const std::vector& constants, - const char* name, - const armnn::LayerBindingId id = 0) override - { - armnn::IgnoreUnused(descriptor, constants, id); - switch (layer->GetType()) - { - case armnn::LayerType::Input: - { - m_IOInfo.m_InputNames.emplace_back(name); - m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()}; - break; - } - case armnn::LayerType::Output: - { - m_IOInfo.m_OutputNames.emplace_back(name); - m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()}; - break; - } - default:{} - } - } - - IOInfo m_IOInfo; - }; - - IOStrategy ioStrategy; - network->ExecuteStrategy(ioStrategy); - - return ioStrategy.m_IOInfo; -} - -armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network) -{ - armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}}; - - armnn::OptimizerOptions options; - options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode; - options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode; - options.m_Debug = m_Params.m_PrintIntermediate; - options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ? - armnn::ShapeInferenceMethod::InferAndValidate : - armnn::ShapeInferenceMethod::ValidateOnly; - options.m_ProfilingEnabled = m_Params.m_EnableProfiling; - - armnn::BackendOptions gpuAcc("GpuAcc", - { - { "FastMathEnabled", m_Params.m_EnableFastMath }, - { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork }, - { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath }, - { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath } - }); - - armnn::BackendOptions cpuAcc("CpuAcc", - { - { "FastMathEnabled", m_Params.m_EnableFastMath }, - { "NumberOfThreads", m_Params.m_NumberOfThreads } - }); - options.m_ModelOptions.push_back(gpuAcc); - options.m_ModelOptions.push_back(cpuAcc); - - const auto optimization_start_time = armnn::GetTimeNow(); - optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options); - - ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2) - << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n"; - - if (!optNet) - { - LogAndThrow("Optimize returned nullptr"); - } - - return optNet; -} - -std::unique_ptr ArmNNExecutor::CreateParser() -{ - // If no model format is given check the file name - const std::string& modelFormat = m_Params.m_ModelPath; - - m_Params.m_IsModelBinary = modelFormat.find("json") == std::string::npos ? true : false; - std::unique_ptr parser = nullptr; - // Forward to implementation based on the parser type - if (modelFormat.find("armnn") != std::string::npos) - { - #if defined(ARMNN_SERIALIZER) - parser = std::make_unique(); - #else - LogAndThrow("Not built with serialization support."); -#endif - } - else if(modelFormat.find("tflite") != std::string::npos) - { - #if defined(ARMNN_TF_LITE_PARSER) - parser = std::make_unique(m_Params); - #else - LogAndThrow("Not built with Tensorflow-Lite parser support."); -#endif - } - else if (modelFormat.find("onnx") != std::string::npos) - { - #if defined(ARMNN_ONNX_PARSER) - parser = std::make_unique(); - #else - LogAndThrow("Not built with Onnx parser support."); - #endif - } - - return parser; -} - -void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors, - unsigned int iteration) -{ - auto findOutputName = [&](const armnn::LayerBindingId id) - { - for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it) - { - if (id == it->second.first) - { - return it->first; - } - } - return std::string{}; - }; - - unsigned int outputIndex = 0; - unsigned int numOutputs = outputTensors->size(); - for (const auto& output: *outputTensors) - { - const auto bindingName = findOutputName(output.first); - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the result - // of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = iteration * numOutputs + outputIndex; - if (!m_Params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '" - << output.first - << "' of iteration: " << iteration+1 << " to file: '" - << m_Params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - - const armnn::Optional outputTensorFile = m_Params.m_OutputTensorFiles.empty() ? - armnn::EmptyOptional() : - armnn::MakeOptional( - m_Params.m_OutputTensorFiles[outputFileIndex]); - - OutputWriteInfo outputWriteInfo - { - outputTensorFile, - bindingName, - output.second, - !m_Params.m_DontPrintOutputs - }; - - std::cout << bindingName << ": "; - std::vector values; - switch (output.second.GetDataType()) - { - case armnn::DataType::Float32: - { - PrintTensor(outputWriteInfo, "%f "); - break; - } - - case armnn::DataType::Signed32: - { - PrintTensor(outputWriteInfo, "%d "); - break; - } - case armnn::DataType::QSymmS8: - case armnn::DataType::QAsymmS8: - { - PrintQuantizedTensor(outputWriteInfo); - break; - } - case armnn::DataType::QAsymmU8: - { - PrintQuantizedTensor(outputWriteInfo); - break; - } - case armnn::DataType::Float16: - case armnn::DataType::QSymmS16: - case armnn::DataType::BFloat16: - case armnn::DataType::Boolean: - case armnn::DataType::Signed64: - break; - } - std::cout << "\n"; - } -} - -void ArmNNExecutor::CompareAndPrintResult(std::vector otherOutput) -{ - unsigned int index = 0; - - for (const auto& outputTensors: m_OutputTensorsVec) - { - for (const auto& outputTensor: outputTensors) - { - float result = 0; - size_t size = outputTensor.second.GetNumBytes(); - - switch (outputTensor.second.GetDataType()) - { - case armnn::DataType::Float32: - { - result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); - break; - } - case armnn::DataType::QSymmS16: - { - result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); - break; - } - case armnn::DataType::QSymmS8: - { - result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); - break; - } - case armnn::DataType::QAsymmU8: - case armnn::DataType::QAsymmS8: - { - result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); - break; - } - default: {} - } - - std::cout << "RMSE: of " << result << "\n"; - } - } -} -#if defined(ARMNN_SERIALIZER) -ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){} - -armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams ¶ms) -{ - const std::string &modelPath = params.m_ModelPath; - - std::ifstream file(modelPath, std::ios::binary); - return m_Parser->CreateNetworkFromBinary(file); -} - -armnn::BindingPointInfo -ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string &inputName) -{ - armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName); - return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo}; -} - -armnn::BindingPointInfo -ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string &outputName) -{ - armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName); - return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo}; -} -#endif - -#if defined(ARMNN_TF_LITE_PARSER) -ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params) -{ - armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options; - options.m_StandInLayerForUnsupported = params.m_ParseUnsupported; - options.m_InferAndValidate = params.m_InferOutputShape; - - m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options); -} - -armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams ¶ms) -{ - const std::string &modelPath = params.m_ModelPath; - return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str()); -} - -armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId, - const std::string &inputName) -{ - return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName); -} - -armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId, - const std::string &outputName) -{ - return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName); -} -#endif - - -#if defined(ARMNN_ONNX_PARSER) -ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){} -armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams ¶ms) -{ - const std::string &modelPath = params.m_ModelPath; - m_Parser = armnnOnnxParser::IOnnxParser::Create(); - std::map inputShapes; - if(!params.m_InputTensorShapes.empty()) - { - const size_t numInputShapes = params.m_InputTensorShapes.size(); - const size_t numInputBindings = params.m_InputNames.size(); - if(numInputShapes < numInputBindings) - { - throw armnn::Exception( - fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}", - numInputBindings, numInputShapes)); - } - - for (size_t i = 0; i < numInputShapes; i++) - { - inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i]; - } - - return params.m_IsModelBinary ? - m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) : - m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes); - } - - // Handle text and binary input differently by calling the corresponding parser function - return params.m_IsModelBinary ? - m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) : - m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str()); -} - -armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string &inputName) -{ - return m_Parser->GetNetworkInputBindingInfo(inputName); -} - -armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string &outputName) -{ - return m_Parser->GetNetworkOutputBindingInfo(outputName); -} -#endif diff --git a/tests/ExecuteNetwork/ArmNNExecutor.hpp b/tests/ExecuteNetwork/ArmNNExecutor.hpp deleted file mode 100644 index aec7a20a06..0000000000 --- a/tests/ExecuteNetwork/ArmNNExecutor.hpp +++ /dev/null @@ -1,161 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "IExecutor.hpp" -#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" -#include "ExecuteNetworkProgramOptions.hpp" -#include "armnn/utility/NumericCast.hpp" -#include "armnn/utility/Timer.hpp" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -#include - -#if defined(ARMNN_SERIALIZER) -#include "armnnDeserializer/IDeserializer.hpp" -#endif -#if defined(ARMNN_TF_LITE_PARSER) -#include -#endif -#if defined(ARMNN_ONNX_PARSER) -#include -#endif - -class ArmNNExecutor : public IExecutor -{ -public: - ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions); - - std::vector Execute() override; - void PrintNetworkInfo() override; - void CompareAndPrintResult(std::vector otherOutput) override; - -private: - - struct IParser; - struct IOInfo; - struct IOStorage; - - using BindingPointInfo = armnn::BindingPointInfo; - - std::unique_ptr CreateParser(); - - void ExecuteAsync(); - void ExecuteSync(); - void SetupInputsAndOutputs(); - - IOInfo GetIOInfo(armnn::INetwork* network); - - void PrintOutputTensors(const armnn::OutputTensors* outputTensors, unsigned int iteration); - - armnn::IOptimizedNetworkPtr OptimizeNetwork(armnn::INetwork* network); - - struct IOStorage - { - IOStorage(size_t size) - { - m_Mem = operator new(size); - } - ~IOStorage() - { - operator delete(m_Mem); - } - IOStorage(IOStorage &&rhs) - { - this->m_Mem = rhs.m_Mem; - rhs.m_Mem = nullptr; - } - - IOStorage(const IOStorage &rhs) = delete; - IOStorage &operator=(IOStorage &rhs) = delete; - IOStorage &operator=(IOStorage &&rhs) = delete; - - void *m_Mem; - }; - - struct IOInfo - { - std::vector m_InputNames; - std::vector m_OutputNames; - std::map m_InputInfoMap; - std::map m_OutputInfoMap; - }; - - IOInfo m_IOInfo; - std::vector m_InputStorage; - std::vector m_OutputStorage; - std::vector m_InputTensorsVec; - std::vector m_OutputTensorsVec; - std::vector> m_ImportedInputIds; - std::vector> m_ImportedOutputIds; - std::shared_ptr m_Runtime; - armnn::NetworkId m_NetworkId; - ExecuteNetworkParams m_Params; - - struct IParser - { - virtual armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) = 0; - virtual armnn::BindingPointInfo GetInputBindingPointInfo(size_t id, const std::string &inputName) = 0; - virtual armnn::BindingPointInfo GetOutputBindingPointInfo(size_t id, const std::string &outputName) = 0; - - virtual ~IParser(){}; - }; - -#if defined(ARMNN_SERIALIZER) - class ArmNNDeserializer : public IParser - { - public: - ArmNNDeserializer(); - - armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams ¶ms) override; - armnn::BindingPointInfo GetInputBindingPointInfo(size_t, const std::string &inputName) override; - armnn::BindingPointInfo GetOutputBindingPointInfo(size_t, const std::string &outputName) override; - - private: - armnnDeserializer::IDeserializerPtr m_Parser; - }; -#endif - -#if defined(ARMNN_TF_LITE_PARSER) - class TfliteParser : public IParser - { - public: - TfliteParser(const ExecuteNetworkParams& params); - - armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams ¶ms) override; - armnn::BindingPointInfo GetInputBindingPointInfo(size_t subgraphId, const std::string &inputName) override; - armnn::BindingPointInfo GetOutputBindingPointInfo(size_t subgraphId, const std::string &outputName) override; - - private: - armnnTfLiteParser::ITfLiteParserPtr m_Parser{nullptr, [](armnnTfLiteParser::ITfLiteParser*){}}; - }; -#endif - -#if defined(ARMNN_ONNX_PARSER) - class OnnxParser : public IParser - { - public: - OnnxParser(); - - armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams ¶ms) override; - armnn::BindingPointInfo GetInputBindingPointInfo(size_t subgraphId, const std::string &inputName) override; - armnn::BindingPointInfo GetOutputBindingPointInfo(size_t subgraphId, const std::string &outputName) override; - - private: - armnnOnnxParser::IOnnxParserPtr m_Parser; - }; -#endif -}; \ No newline at end of file diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 73cbbb8162..153fe5bcc7 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -1,32 +1,993 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // +#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" #include "ExecuteNetworkProgramOptions.hpp" -#include "ArmNNExecutor.hpp" -#if defined(ARMNN_TF_LITE_DELEGATE) -#include "TfliteExecutor.hpp" -#endif +#include +#include + #include +#include +#include +#include +#include + +#if defined(ARMNN_SERIALIZER) +#include "armnnDeserializer/IDeserializer.hpp" +#endif +#if defined(ARMNN_TF_LITE_PARSER) +#include "armnnTfLiteParser/ITfLiteParser.hpp" +#endif +#if defined(ARMNN_ONNX_PARSER) +#include "armnnOnnxParser/IOnnxParser.hpp" +#endif +#if defined(ARMNN_TFLITE_DELEGATE) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +#include -std::unique_ptr BuildExecutor(ProgramOptions& programOptions) +/** + * Given a measured duration and a threshold time tell the user whether we succeeded or not. + * + * @param duration the measured inference duration. + * @param thresholdTime the threshold time in milliseconds. + * @return false if the measured time exceeded the threshold. + */ +bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, + const double& thresholdTime) { - if (programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate || - programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) + ARMNN_LOG(info) << "Inference time: " << std::setprecision(2) + << std::fixed << duration.count() << " ms\n"; + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (thresholdTime != 0.0) { -#if defined(ARMNN_TF_LITE_DELEGATE) - return std::make_unique(programOptions.m_ExNetParams); -#else - ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; - return nullptr; -#endif + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << thresholdTime << " ms"; + auto thresholdMinusInference = thresholdTime - duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + return false; + } + } + return true; +} + +#if defined(ARMNN_TFLITE_DELEGATE) +int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions) +{ + // Build model and corresponding interpreter + using namespace tflite; + + std::unique_ptr model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str()); + + auto tfLiteInterpreter = std::make_unique(); + tflite::ops::builtin::BuiltinOpResolver resolver; + + tflite::InterpreterBuilder builder(*model, resolver); + builder(&tfLiteInterpreter); + tfLiteInterpreter->AllocateTensors(); + + int status = 0; + + // Create & populate Armnn Delegate, then register it to TfLiteInterpreter + if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate) + { + // Create the Armnn Delegate + // Populate a DelegateOptions from the ExecuteNetworkParams. + armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions(); + delegateOptions.SetExternalProfilingParams( + arm::pipe::ConvertExternalProfilingOptions(runtimeOptions.m_ProfilingOptions)); + + std::unique_ptr + theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), + armnnDelegate::TfLiteArmnnDelegateDelete); + // Register armnn_delegate to TfLiteInterpreter + status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate)); + if (status != kTfLiteOk) + { + ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!"; + return EXIT_FAILURE; + } } else { - return std::make_unique(programOptions.m_ExNetParams, programOptions.m_RuntimeOptions); + std::cout << "Running on TfLite without ArmNN delegate\n"; } + + const size_t numInputs = params.m_InputNames.size(); + // Populate input tensor of interpreter + for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex) + { + // Load (or generate) input data for inference + armnn::Optional dataFile = params.m_GenerateTensorData ? armnn::EmptyOptional() : + armnn::MakeOptional(params.m_InputTensorDataFilePaths[inputIndex]); + + int input = tfLiteInterpreter->inputs()[inputIndex]; + TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims; + + unsigned int inputSize = 1; + if (params.m_InputTensorShapes.size() > 0) + { + inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements(); + } + else + { + for (unsigned int dim = 0; dim < static_cast(inputDims->size); ++dim) + { + inputSize *= inputDims->data[dim]; + } + } + + if (params.m_InputTypes[inputIndex].compare("float") == 0) + { + auto inputData = tfLiteInterpreter->typed_tensor(input); + + if(inputData == NULL) + { + ARMNN_LOG(fatal) << "Input tensor is null, input type: " + "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + std::vector tensorData; + PopulateTensorWithDataGeneric(tensorData, + inputSize, + dataFile, + [](const std::string& s) + { return std::stof(s); }); + + std::copy(tensorData.begin(), tensorData.end(), inputData); + } + else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0 || + params.m_InputTypes[inputIndex].compare("qasymms8") == 0) + { + auto inputData = tfLiteInterpreter->typed_tensor(input); + + if(inputData == NULL) + { + ARMNN_LOG(fatal) << "Input tensor is null, input type: " + "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + std::vector tensorData; + PopulateTensorWithDataGeneric(tensorData, + inputSize, + dataFile, + [](const std::string& s) + { return armnn::numeric_cast(std::stoi(s)); }); + + std::copy(tensorData.begin(), tensorData.end(), inputData); + } + else if (params.m_InputTypes[inputIndex].compare("int") == 0) + { + auto inputData = tfLiteInterpreter->typed_tensor(input); + + if(inputData == NULL) + { + ARMNN_LOG(fatal) << "Input tensor is null, input type: " + "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + std::vector tensorData; + PopulateTensorWithDataGeneric(tensorData, + inputSize, + dataFile, + [](const std::string& s) + { return std::stoi(s); }); + + std::copy(tensorData.begin(), tensorData.end(), inputData); + } + else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 || + params.m_InputTypes[inputIndex].compare("qasymmu8") == 0) + { + auto inputData = tfLiteInterpreter->typed_tensor(input); + + if(inputData == NULL) + { + ARMNN_LOG(fatal) << "Input tensor is null, input type: " + "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + std::vector tensorData; + PopulateTensorWithDataGeneric(tensorData, + inputSize, + dataFile, + [](const std::string& s) + { return armnn::numeric_cast(std::stoi(s)); }); + + std::copy(tensorData.begin(), tensorData.end(), inputData); + } + else + { + ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". "; + return EXIT_FAILURE; + } + } + + // Run inference, print the output of the inference + for (size_t x = 0; x < params.m_Iterations; x++) + { + // Start timer to record inference time in milliseconds. + const auto start_time = armnn::GetTimeNow(); + // Run the inference + status = tfLiteInterpreter->Invoke(); + const auto duration = armnn::GetTimeDuration(start_time); + + // The TFLite interpreter's outputs might be in a different order than the user inputted output names. + std::map paramToTfliteOutputIndex; + for (unsigned int paramIndex = 0; paramIndex < params.m_OutputNames.size(); ++paramIndex) + { + paramToTfliteOutputIndex[paramIndex] = -1; + for (unsigned int tfLiteIndex = 0; tfLiteIndex < tfLiteInterpreter->outputs().size(); ++tfLiteIndex) + { + if (params.m_OutputNames[paramIndex] == tfLiteInterpreter->GetOutputName(tfLiteIndex)) + { + paramToTfliteOutputIndex[paramIndex] = tfLiteIndex; + } + } + } + + // Print out the output + for (unsigned int paramOutputIndex = 0; paramOutputIndex < params.m_OutputNames.size(); ++paramOutputIndex) + { + int outputIndex = paramToTfliteOutputIndex[paramOutputIndex]; + if (outputIndex == -1) + { + std::cout << fmt::format("Output name: {} doesn't exist.", params.m_OutputNames[paramOutputIndex]) << + std::endl; + continue; + } + auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex]; + TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims; + // If we've been asked to write to a file then set a file output stream. Otherwise use stdout. + FILE* outputTensorFile = stdout; + if (!params.m_OutputTensorFiles.empty()) + { + outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w"); + if (outputTensorFile == NULL) + { + ARMNN_LOG(fatal) << "Specified output tensor file, \"" << + params.m_OutputTensorFiles[outputIndex] << + "\", cannot be created. Defaulting to stdout. " << + "Error was: " << std::strerror(errno); + outputTensorFile = stdout; + } + else + { + ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '" + << params.m_OutputTensorFiles[outputIndex] << "'"; + } + } + long outputSize = 1; + for (unsigned int dim = 0; dim < static_cast(outputDims->size); ++dim) + { + outputSize *= outputDims->data[dim]; + } + + std::cout << tfLiteInterpreter->GetOutputName(outputIndex) << ": "; + if (params.m_OutputTypes[paramOutputIndex].compare("float") == 0) + { + auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + if(tfLiteDelageOutputData == NULL) + { + ARMNN_LOG(fatal) << "Output tensor is null, output type: " + "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + if (!params.m_DontPrintOutputs) + { + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]); + } + } + } + else if (params.m_OutputTypes[paramOutputIndex].compare("int") == 0) + { + auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + if(tfLiteDelageOutputData == NULL) + { + ARMNN_LOG(fatal) << "Output tensor is null, output type: " + "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + if (!params.m_DontPrintOutputs) + { + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); + } + } + } + else if (params.m_OutputTypes[paramOutputIndex].compare("qsymms8") == 0 || + params.m_OutputTypes[paramOutputIndex].compare("qasymms8") == 0) + { + auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + if(tfLiteDelageOutputData == NULL) + { + ARMNN_LOG(fatal) << "Output tensor is null, output type: " + "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + if (!params.m_DontPrintOutputs) + { + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); + } + } + } + else if (params.m_OutputTypes[paramOutputIndex].compare("qasymm8") == 0 || + params.m_OutputTypes[paramOutputIndex].compare("qasymmu8") == 0) + { + auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + if(tfLiteDelageOutputData == NULL) + { + ARMNN_LOG(fatal) << "Output tensor is null, output type: " + "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; + return EXIT_FAILURE; + } + + if (!params.m_DontPrintOutputs) + { + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]); + } + } + } + else + { + ARMNN_LOG(fatal) << "Output tensor is null, output type: " + "\"" << params.m_OutputTypes[paramOutputIndex] << + "\" may be incorrect. Output type can be specified with -z argument"; + return EXIT_FAILURE; + } + std::cout << std::endl; + } + CheckInferenceTimeThreshold(duration, params.m_ThresholdTime); + } + + return status; +} +#endif +template +int MainImpl(const ExecuteNetworkParams& params, + const std::shared_ptr& runtime = nullptr) +{ + using namespace std::chrono; + + std::vector> inputs; + std::vector> outputs; + + try + { + // Creates an InferenceModel, which will parse the model and load it into an IRuntime. + typename InferenceModel::Params inferenceModelParams; + inferenceModelParams.m_ModelPath = params.m_ModelPath; + inferenceModelParams.m_AllowExpandedDims = params.m_AllowExpandedDims; + inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary; + inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices; + inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath; + inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate; + inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails; + inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported; + inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape; + inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath; + inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork; + inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; + inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; + inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; + inferenceModelParams.m_AsyncEnabled = params.m_Concurrent; + inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize; + inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut; + inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut; + inferenceModelParams.m_ImportInputsIfAligned = params.m_ImportInputsIfAligned; + + for(const std::string& inputName: params.m_InputNames) + { + inferenceModelParams.m_InputBindings.push_back(inputName); + } + + for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i) + { + inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]); + } + + for(const std::string& outputName: params.m_OutputNames) + { + inferenceModelParams.m_OutputBindings.push_back(outputName); + } + + inferenceModelParams.m_SubgraphId = params.m_SubgraphId; + inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode; + inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode; + + InferenceModel model(inferenceModelParams, + params.m_EnableProfiling, + params.m_DynamicBackendsPath, + runtime); + + const size_t numInputs = inferenceModelParams.m_InputBindings.size(); + + armnn::Optional qParams = params.m_QuantizeInput ? + armnn::MakeOptional( + model.GetInputQuantizationParams()) : + armnn::EmptyOptional(); + + if (params.m_InputTensorDataFilePaths.size() > numInputs) + { + ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required " + << "for each input. The user provided " + << params.m_InputTensorDataFilePaths.size() + << " input-tensor-data file/s which will be used to fill the input/s.\n"; + } + + const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); + + // The user is allowed to specify the data type of each output tensor. It is used here to construct the + // result tensors for each iteration. It is possible for the user to specify a type that does not match + // the data type of the corresponding model output. It may not make sense, but it is historically allowed. + // The potential problem here is a buffer overrun when a larger data type is written into the space for a + // smaller one. Issue a warning to highlight the potential problem. + for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx) + { + armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType(); + switch (type) + { + // --output-type only supports float, int, qasymms8 or qasymmu8. + case armnn::DataType::Float32: + if (params.m_OutputTypes[outputIdx].compare("float") != 0) + { + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ". This may cause unexpected problems or random failures."; + } + break; + case armnn::DataType::QAsymmU8: + if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0) + { + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ". This may cause unexpected problems or random failures."; + } + break; + case armnn::DataType::Signed32: + if (params.m_OutputTypes[outputIdx].compare("int") != 0) + { + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ". This may cause unexpected problems or random failures."; + } + break; + case armnn::DataType::QAsymmS8: + if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0) + { + ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " + << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << + ". This may cause unexpected problems or random failures."; + } + break; + default: + break; + } + } + + if (!params.m_ReuseBuffers) + { + for (unsigned int j = 0; j < params.m_Iterations; ++j) + { + std::vector inputDataContainers; + for (unsigned int i = 0; i < numInputs; ++i) + { + // If there are fewer input files given than required for the execution of + // params.m_Iterations we simply start with the first input file again + size_t inputFileIndex = j * numInputs + i; + if (!params.m_InputTensorDataFilePaths.empty()) + { + inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); + } + + armnn::Optional dataFile = params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + params.m_InputTensorDataFilePaths.at( + inputFileIndex)); + + unsigned int numElements = model.GetInputSize(i); + if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + { + // If the user has provided a tensor shape for the current input, + // override numElements + numElements = params.m_InputTensorShapes[i]->GetNumElements(); + } + + armnnUtils::TContainer tensorData; + PopulateTensorWithData(tensorData, + numElements, + params.m_InputTypes[i], + qParams, + dataFile); + + inputDataContainers.push_back(tensorData); + } + inputs.push_back(inputDataContainers); + } + + for (unsigned int j = 0; j < params.m_Iterations; ++j) + { + std::vector outputDataContainers; + for (unsigned int i = 0; i < numOutputs; ++i) + { + if (params.m_OutputTypes[i].compare("float") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else if (params.m_OutputTypes[i].compare("int") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || + params.m_OutputTypes[i].compare("qasymmu8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else if (params.m_OutputTypes[i].compare("qasymms8") == 0) + { + outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); + } + else + { + ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; + return EXIT_FAILURE; + } + } + outputs.push_back(outputDataContainers); + } + } + if (params.m_Iterations > 1) + { + std::stringstream msg; + msg << "Network will be executed " << params.m_Iterations; + if (params.m_Concurrent) + { + msg << " times in an asynchronous manner. "; + } + else + { + msg << " times successively. "; + } + msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to " + "cover each execution."; + ARMNN_LOG(info) << msg.str(); + } + + // Synchronous execution + if (!params.m_Concurrent && !params.m_ReuseBuffers) + { + for (size_t x = 0; x < params.m_Iterations; x++) + { + // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) + auto inference_duration = model.Run(inputs[x], outputs[x]); + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + if (params.m_DontPrintOutputs) + { + ARMNN_LOG(info) << "Printing outputs to console is disabled."; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo& infoOut = infosOut[i].second; + + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the result + // of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = x * numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] + << "' of iteration: " << x+1 << " to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[outputFileIndex]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput, + !params.m_DontPrintOutputs); + mapbox::util::apply_visitor(printer, outputs[x][i]); + } + + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference_duration.count() << " ms\n"; + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + } + } + } + } + // Synchronous Execution using a single buffer for input and output data + else if(!params.m_Concurrent) + { + std::vector input; + std::vector output; + + for (unsigned int i = 0; i < numInputs; ++i) + { + // If there are fewer input files given than required for the execution of + // params.m_Iterations we simply start with the first input file again + size_t inputFileIndex = numInputs + i; + if (!params.m_InputTensorDataFilePaths.empty()) + { + inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); + } + + armnn::Optional dataFile = params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + params.m_InputTensorDataFilePaths.at( + inputFileIndex)); + + unsigned int numElements = model.GetInputSize(i); + if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) + { + // If the user has provided a tensor shape for the current input, + // override numElements + numElements = params.m_InputTensorShapes[i]->GetNumElements(); + } + + armnnUtils::TContainer tensorData; + PopulateTensorWithData(tensorData, + numElements, + params.m_InputTypes[i], + qParams, + dataFile); + + input.push_back(tensorData); + } + + for (unsigned int i = 0; i < numOutputs; ++i) + { + if (params.m_OutputTypes[i].compare("float") == 0) + { + output.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("int") == 0) { + output.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || + params.m_OutputTypes[i].compare("qasymmu8") == 0) + { + output.push_back(std::vector(model.GetOutputSize(i))); + } else if (params.m_OutputTypes[i].compare("qasymms8") == 0) + { + output.push_back(std::vector(model.GetOutputSize(i))); + } else { + ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; + return EXIT_FAILURE; + } + } + + std::vector> timings; + timings.reserve(params.m_Iterations); + for (size_t x = 0; x < params.m_Iterations; x++) + { + // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) + auto inference_duration = model.Run(input, output); + timings.push_back(inference_duration); + } + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + if (params.m_DontPrintOutputs) + { + ARMNN_LOG(info) << "Printing outputs to console is disabled."; + } + + // Print output. This only needs to happen once as input is the same for each iteration. + const auto &infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + const armnn::TensorInfo &infoOut = infosOut[i].second; + + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the result + // of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] <<" to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[outputFileIndex]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput, + !params.m_DontPrintOutputs); + mapbox::util::apply_visitor(printer, output[i]); + } + + for(auto inference: timings) + { + + ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) + << std::fixed << inference.count() << " ms\n"; + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (params.m_ThresholdTime != 0.0) + { + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << params.m_ThresholdTime << " ms"; + auto thresholdMinusInference = params.m_ThresholdTime - inference.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + + if (thresholdMinusInference < 0) + { + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + } + } + } + } + + // Asynchronous execution using the Arm NN thread pool + else if (params.m_ThreadPoolSize >= 1) + { + try + { + ARMNN_LOG(info) << "Asynchronous execution with Arm NN thread pool... \n"; + armnn::AsyncCallbackManager callbackManager; + std::unordered_map&> inferenceOutputMap; + + // Declare the latest and earliest inference times here to be used when calculating overall time + std::chrono::high_resolution_clock::time_point earliestStartTime; + std::chrono::high_resolution_clock::time_point latestEndTime = + std::chrono::high_resolution_clock::now(); + + // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the + // LoadedNetwork with each scheduled inference having a specific priority + for (size_t i = 0; i < params.m_Iterations; ++i) + { + std::shared_ptr cb = callbackManager.GetNewCallback(); + inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]}); + model.RunAsync(inputs[i], outputs[i], cb); + } + + // Check the results + unsigned int j = 0; + for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration) + { + auto cb = callbackManager.GetNotifiedCallback(); + + // Get the results + auto endTime = time_point_cast(cb->GetEndTime()); + auto startTime = time_point_cast(cb->GetStartTime()); + auto inferenceDuration = endTime - startTime; + + if (latestEndTime < cb->GetEndTime()) + { + latestEndTime = cb->GetEndTime(); + } + + if (earliestStartTime.time_since_epoch().count() == 0) + { + earliestStartTime = cb->GetStartTime(); + } + else if (earliestStartTime > cb->GetStartTime()) + { + earliestStartTime = cb->GetStartTime(); + } + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + if (params.m_DontPrintOutputs) + { + ARMNN_LOG(info) << "Printing outputs to console is disabled."; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the + // result of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = iteration * numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] + << "' of iteration: " << iteration+1 << " to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[outputFileIndex]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput, + !params.m_DontPrintOutputs); + mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]); + } + + CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); + ++j; + } + //print duration difference between overallStartTime and overallEndTime + auto overallEndTime = time_point_cast(latestEndTime); + auto overallStartTime = time_point_cast(earliestStartTime); + auto totalInferenceDuration = overallEndTime - overallStartTime; + ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) + << std::fixed << totalInferenceDuration.count() << " ms\n"; + } + catch (const armnn::Exception& e) + { + ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); + return EXIT_FAILURE; + } + } + // Asynchronous execution using std::launch::async + else + { + try + { + ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n"; + std::vector>>> inferenceResults; + inferenceResults.reserve(params.m_Iterations); + + // Create WorkingMemHandles for each inference + std::vector> workingMemHandles; + workingMemHandles.reserve(params.m_Iterations); + for (unsigned int i = 0; i < params.m_Iterations; ++i) + { + workingMemHandles.push_back(model.CreateWorkingMemHandle()); + } + + // Run each inference in its own thread + // start a timer + const auto start_time = armnn::GetTimeNow(); + for (unsigned int i = 0; i < params.m_Iterations; ++i) + { + armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); + + inferenceResults.push_back(std::async( + std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() { + return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i); + } + )); + } + + // Check the results + for (unsigned int j = 0; j < inferenceResults.size(); ++j) + { + // Get the results + auto inferenceResult = inferenceResults[j].get(); + auto inferenceDuration = std::get<1>(inferenceResult); + auto inferenceID = std::get<0>(inferenceResult); + + if (params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + if (params.m_DontPrintOutputs) + { + ARMNN_LOG(info) << "Printing outputs to console is disabled."; + } + + // Print output tensors + const auto& infosOut = model.GetOutputBindingInfos(); + for (size_t i = 0; i < numOutputs; i++) + { + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the + // result of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = j * numOutputs + i; + if (!params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output " << i << " named: '" + << inferenceModelParams.m_OutputBindings[i] + << "' of iteration: " << j+1 << " to file: '" + << params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + const armnn::TensorInfo& infoOut = infosOut[i].second; + auto outputTensorFile = params.m_OutputTensorFiles.empty() + ? "" + : params.m_OutputTensorFiles[outputFileIndex]; + + TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], + infoOut, + outputTensorFile, + params.m_DequantizeOutput, + !params.m_DontPrintOutputs); + mapbox::util::apply_visitor(printer, outputs[j][i]); + } + CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); + ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; + } + // finish timer + const auto duration = armnn::GetTimeDuration(start_time); + ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) + << std::fixed << duration.count() << " ms\n"; + } + catch (const armnn::Exception& e) + { + ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); + return EXIT_FAILURE; + } + } + } + catch (const armnn::Exception& e) + { + ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); + return EXIT_FAILURE; + } + + return EXIT_SUCCESS; } // MAIN @@ -43,49 +1004,74 @@ int main(int argc, const char* argv[]) // Get ExecuteNetwork parameters and runtime options from command line // This might throw an InvalidArgumentException if the user provided invalid inputs - ProgramOptions programOptions; - try - { - programOptions.ParseOptions(argc, argv); - } - catch (const std::exception &e) - { + ProgramOptions ProgramOptions; + try { + ProgramOptions.ParseOptions(argc, argv); + } catch (const std::exception &e){ ARMNN_LOG(fatal) << e.what(); return EXIT_FAILURE; } - std::vector outputResults; - - auto executor = BuildExecutor(programOptions); - if (!executor) + if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut || + ProgramOptions.m_ExNetParams.m_OutputDetailsOnlyToStdOut) + && !ProgramOptions.m_ExNetParams.m_EnableProfiling) { + ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details"; return EXIT_FAILURE; } - executor->PrintNetworkInfo(); - outputResults = executor->Execute(); + std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat; - if (!programOptions.m_ExNetParams.m_ComparisonComputeDevices.empty() || - programOptions.m_ExNetParams.m_CompareWithTflite) + // Forward to implementation based on the parser type + if (modelFormat.find("armnn") != std::string::npos) { - ExecuteNetworkParams comparisonParams = programOptions.m_ExNetParams; - comparisonParams.m_ComputeDevices = programOptions.m_ExNetParams.m_ComparisonComputeDevices; - - if (programOptions.m_ExNetParams.m_CompareWithTflite) + #if defined(ARMNN_SERIALIZER) + std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); + return MainImpl(ProgramOptions.m_ExNetParams, runtime); + #else + ARMNN_LOG(fatal) << "Not built with serialization support."; + return EXIT_FAILURE; + #endif + } + else if (modelFormat.find("onnx") != std::string::npos) + { + #if defined(ARMNN_ONNX_PARSER) + std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); + return MainImpl(ProgramOptions.m_ExNetParams, runtime); + #else + ARMNN_LOG(fatal) << "Not built with Onnx parser support."; + return EXIT_FAILURE; + #endif + } + else if(modelFormat.find("tflite") != std::string::npos) + { + if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteParser) { - comparisonParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter; + #if defined(ARMNN_TF_LITE_PARSER) + std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); + return MainImpl(ProgramOptions.m_ExNetParams, runtime); + #else + ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support."; + return EXIT_FAILURE; + #endif } - - auto comparisonExecutor = BuildExecutor(programOptions); - - if (!comparisonExecutor) + else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == + ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate || + ProgramOptions.m_ExNetParams.m_TfLiteExecutor == + ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) { + #if defined(ARMNN_TF_LITE_DELEGATE) + return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions); + #else + ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; return EXIT_FAILURE; + #endif } - - comparisonExecutor->PrintNetworkInfo(); - comparisonExecutor->Execute(); - - comparisonExecutor->CompareAndPrintResult(outputResults); + } + else + { + ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat + << "'. Please include 'tflite' or 'onnx'"; + return EXIT_FAILURE; } } diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index f341c30738..cc75bb4323 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -1,15 +1,76 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ExecuteNetworkParams.hpp" #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" +#include #include #include -#include + +bool IsModelBinary(const std::string& modelFormat) +{ + // Parse model binary flag from the model-format string we got from the command-line + if (modelFormat.find("binary") != std::string::npos) + { + return true; + } + else if (modelFormat.find("txt") != std::string::npos || modelFormat.find("text") != std::string::npos) + { + return false; + } + else + { + throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. " + "Please include 'binary' or 'text'", + modelFormat)); + } +} + +void CheckModelFormat(const std::string& modelFormat) +{ + // Forward to implementation based on the parser type + if (modelFormat.find("armnn") != std::string::npos) + { +#if defined(ARMNN_SERIALIZER) +#else + throw armnn::InvalidArgumentException("Can't run model in armnn format without a " + "built with serialization support."); +#endif + } + else if (modelFormat.find("onnx") != std::string::npos) + { +#if defined(ARMNN_ONNX_PARSER) +#else + throw armnn::InvalidArgumentException("Can't run model in onnx format without a " + "built with Onnx parser support."); +#endif + } + else if (modelFormat.find("tflite") != std::string::npos) + { +#if defined(ARMNN_TF_LITE_PARSER) + if (!IsModelBinary(modelFormat)) + { + throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. Only 'binary' " + "format supported for tflite files", + modelFormat)); + } +#elif defined(ARMNN_TFLITE_DELEGATE) +#else + throw armnn::InvalidArgumentException("Can't run model in tflite format without a " + "built with Tensorflow Lite parser support."); +#endif + } + else + { + throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. " + "Please include 'tflite' or 'onnx'", + modelFormat)); + } +} void CheckClTuningParameter(const int& tuningLevel, const std::string& tuningPath, @@ -44,6 +105,7 @@ void CheckClTuningParameter(const int& tuningLevel, ARMNN_LOG(warning) << "To use Cl Tuning the compute device GpuAcc needs to be active."; } } + } void ExecuteNetworkParams::ValidateParams() @@ -58,6 +120,7 @@ void ExecuteNetworkParams::ValidateParams() << invalidBackends; } } + CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices); if (m_EnableBf16TurboMode && m_EnableFp16TurboMode) @@ -66,6 +129,10 @@ void ExecuteNetworkParams::ValidateParams() "enabled at the same time."); } + m_IsModelBinary = IsModelBinary(m_ModelFormat); + + CheckModelFormat(m_ModelFormat); + // Check input tensor shapes if ((m_InputTensorShapes.size() != 0) && (m_InputTensorShapes.size() != m_InputNames.size())) @@ -90,6 +157,68 @@ void ExecuteNetworkParams::ValidateParams() m_InputNames.size(), m_InputTensorDataFilePaths.size())); } + else if (m_InputTensorDataFilePaths.size() % m_InputNames.size() != 0) + { + throw armnn::InvalidArgumentException( + fmt::format("According to the number of input names the user provided the network has {} " + "inputs. The user specified {} input-tensor-data file paths which is not " + "divisible by the number of inputs.", + m_InputNames.size(), + m_InputTensorDataFilePaths.size())); + } + } + + if (m_InputTypes.size() == 0) + { + //Defaults the value of all inputs to "float" + m_InputTypes.assign(m_InputNames.size(), "float"); + } + else if ((m_InputTypes.size() != 0) && + (m_InputTypes.size() != m_InputNames.size())) + { + throw armnn::InvalidArgumentException("input-name and input-type must have the same amount of elements."); + } + + // Make sure that the number of input files given is divisible by the number of inputs of the model + if (!(m_InputTensorDataFilePaths.size() % m_InputNames.size() == 0)) + { + throw armnn::InvalidArgumentException( + fmt::format("The number of input-tensor-data files ({0}) is not divisible by the " + "number of inputs ({1} according to the number of input names).", + m_InputTensorDataFilePaths.size(), + m_InputNames.size())); + } + + if (m_OutputTypes.size() == 0) + { + //Defaults the value of all outputs to "float" + m_OutputTypes.assign(m_OutputNames.size(), "float"); + } + else if ((m_OutputTypes.size() != 0) && + (m_OutputTypes.size() != m_OutputNames.size())) + { + throw armnn::InvalidArgumentException("output-name and output-type must have the same amount of elements."); + } + + // Make sure that the number of output files given is equal to the number of outputs of the model + // or equal to the number of outputs of the model multiplied with the number of iterations + if (!m_OutputTensorFiles.empty()) + { + if ((m_OutputTensorFiles.size() != m_OutputNames.size()) && + (m_OutputTensorFiles.size() != m_OutputNames.size() * m_Iterations)) + { + std::stringstream errmsg; + auto numOutputs = m_OutputNames.size(); + throw armnn::InvalidArgumentException( + fmt::format("The user provided {0} output-tensor files. The only allowed number of output-tensor " + "files is the number of outputs of the network ({1} according to the number of " + "output names) or the number of outputs multiplied with the number of times the " + "network should be executed (NumOutputs * NumIterations = {1} * {2} = {3}).", + m_OutputTensorFiles.size(), + numOutputs, + m_Iterations, + numOutputs*m_Iterations)); + } } // Check that threshold time is not less than zero @@ -181,5 +310,4 @@ armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const return delegateOptions; } - #endif diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index 104c1c50c2..5ef2b6ea7c 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -16,6 +16,8 @@ /// Check ExecuteNetworkProgramOptions.cpp for a description of each parameter struct ExecuteNetworkParams { + using TensorShapePtr = std::unique_ptr; + enum class TfLiteExecutor { ArmNNTfLiteParser, @@ -23,49 +25,50 @@ struct ExecuteNetworkParams TfliteInterpreter }; - bool m_AllowExpandedDims; - std::string m_CachedNetworkFilePath; - std::vector m_ComputeDevices; - bool m_Concurrent; - bool m_DequantizeOutput; - std::string m_DynamicBackendsPath; - bool m_EnableBf16TurboMode; - bool m_EnableFastMath = false; - bool m_EnableFp16TurboMode; - bool m_EnableLayerDetails = false; - bool m_EnableProfiling; - bool m_GenerateTensorData; - bool m_InferOutputShape = false; - bool m_EnableDelegate = false; - bool m_IsModelBinary; - std::vector m_InputNames; - std::vector m_InputTensorDataFilePaths; - std::vector m_InputTensorShapes; - size_t m_Iterations; - std::string m_ModelPath; - unsigned int m_NumberOfThreads; - bool m_OutputDetailsToStdOut; - bool m_OutputDetailsOnlyToStdOut; - std::vector m_OutputNames; - std::vector m_OutputTensorFiles; - bool m_ParseUnsupported = false; - bool m_PrintIntermediate; - bool m_DontPrintOutputs; - bool m_QuantizeInput; - bool m_SaveCachedNetwork; - size_t m_SubgraphId; - double m_ThresholdTime; - int m_TuningLevel; - std::string m_TuningPath; - std::string m_MLGOTuningFilePath; - TfLiteExecutor m_TfLiteExecutor; - size_t m_ThreadPoolSize; - bool m_ImportInputsIfAligned; - bool m_ReuseBuffers; + bool m_AllowExpandedDims; + std::string m_CachedNetworkFilePath; + std::vector m_ComputeDevices; + bool m_Concurrent; + bool m_DequantizeOutput; + std::string m_DynamicBackendsPath; + bool m_EnableBf16TurboMode; + bool m_EnableFastMath = false; + bool m_EnableFp16TurboMode; + bool m_EnableLayerDetails = false; + bool m_EnableProfiling; + bool m_GenerateTensorData; + bool m_InferOutputShape = false; + bool m_EnableDelegate = false; + std::vector m_InputNames; + std::vector m_InputTensorDataFilePaths; + std::vector m_InputTensorShapes; + std::vector m_InputTypes; + bool m_IsModelBinary; + size_t m_Iterations; + std::string m_ModelFormat; + std::string m_ModelPath; + unsigned int m_NumberOfThreads; + bool m_OutputDetailsToStdOut; + bool m_OutputDetailsOnlyToStdOut; + std::vector m_OutputNames; + std::vector m_OutputTensorFiles; + std::vector m_OutputTypes; + bool m_ParseUnsupported = false; + bool m_PrintIntermediate; + bool m_DontPrintOutputs; + bool m_QuantizeInput; + bool m_SaveCachedNetwork; + size_t m_SimultaneousIterations; + size_t m_SubgraphId; + double m_ThresholdTime; + int m_TuningLevel; + std::string m_TuningPath; + std::string m_MLGOTuningFilePath; + TfLiteExecutor m_TfLiteExecutor; + size_t m_ThreadPoolSize; + bool m_ImportInputsIfAligned; + bool m_ReuseBuffers; - std::string m_ComparisonFile; - std::vector m_ComparisonComputeDevices; - bool m_CompareWithTflite; // Ensures that the parameters for ExecuteNetwork fit together void ValidateParams(); diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index da6200b8a9..ad35092c1d 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -1,10 +1,11 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ExecuteNetworkProgramOptions.hpp" #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" +#include "InferenceTest.hpp" #include #include @@ -50,6 +51,8 @@ void CheckOptionDependency(const cxxopts::ParseResult& result, void CheckOptionDependencies(const cxxopts::ParseResult& result) { + CheckOptionDependency(result, "model-path", "model-format"); + CheckOptionDependency(result, "input-tensor-shape", "model-path"); CheckOptionDependency(result, "tuning-level", "tuning-path"); } @@ -116,8 +119,10 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result) // For each option in option-group "a) Required std::vector requiredOptions{"compute", - "model-path" - }; + "model-format", + "model-path", + "input-name", + "output-name"}; bool requiredMissing = false; for(auto const& str : requiredOptions) @@ -139,39 +144,13 @@ void CheckForDeprecatedOptions(const cxxopts::ParseResult& result) if(result.count("simultaneous-iterations") > 0) { ARMNN_LOG(warning) << "DEPRECATED: The program option 'simultaneous-iterations' is deprecated and will be " - "removed soon. Please use the option '\"P, enable-thread-pool\"' instead."; + "removed soon. Please use the option 'iterations' combined with 'concurrent' instead."; } if(result.count("armnn-tflite-delegate") > 0) { ARMNN_LOG(warning) << "DEPRECATED: The program option 'armnn-tflite-delegate' is deprecated and will be " "removed soon. Please use the option 'tflite-executor' instead."; } - if(result.count("concurrent") > 0) - { - ARMNN_LOG(warning) << "DEPRECATED: The program option 'concurrent' is deprecated and will be " - "removed soon. Please use the option '\"P, enable-thread-pool\"' instead."; - } - if(result.count("input-type") > 0) - { - ARMNN_LOG(warning) << "DEPRECATED: The program option 'input-type' is deprecated and will be " - "removed soon. The input-types are now automatically set."; - } - if(result.count("output-type") > 0) - { - ARMNN_LOG(warning) << "DEPRECATED: The program option 'output-type' is deprecated and will be " - "removed soon. The output-types are now automatically set."; - } - if(result.count("output-name") > 0) - { - ARMNN_LOG(warning) << "DEPRECATED: The program option 'output-name' is deprecated and will be " - "removed soon. The output-names are now automatically set."; - } - if(result.count("model-format") > 0) - { - ARMNN_LOG(warning) << "DEPRECATED: The program option 'input-name' is deprecated and will be " - "removed soon. The model-format are now automatically set."; - } - } void ProgramOptions::ValidateExecuteNetworkParams() @@ -208,9 +187,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value>()) ("f,model-format", - "armnn-binary, onnx-binary, onnx-text, tflite-binary" - "DEPRECATED: The program option 'input-name' is deprecated and will be " - "removed soon. The model-format are now automatically set.", + "armnn-binary, onnx-binary, onnx-text, tflite-binary", cxxopts::value()) ("m,model-path", @@ -218,13 +195,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value(m_ExNetParams.m_ModelPath)) ("i,input-name", - "Identifier of the input tensors in the network separated by comma." - "This option is not required, but can be used to set the order of inputs", + "Identifier of the input tensors in the network separated by comma.", cxxopts::value()) ("o,output-name", - "Identifier of the output tensors in the network separated by comma." - "This option is not required, but can be used to set the order of outputs", + "Identifier of the output tensors in the network separated by comma.", cxxopts::value()); m_CxxOptions.add_options("b) General") @@ -233,16 +208,10 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If left empty (the default), dynamic backends will not be used.", cxxopts::value(m_RuntimeOptions.m_DynamicBackendsPath)) - ("P, thread-pool-size", - "Run the network using the Arm NN thread pool with the number of threads provided. ", - cxxopts::value(m_ExNetParams.m_ThreadPoolSize)->default_value("0")) - ("n,concurrent", "This option is for Arm NN internal asynchronous testing purposes. " "False by default. If set to true will use std::launch::async or the Arm NN thread pool, " - "if 'thread-pool-size' is greater than 0, for asynchronous execution." - "DEPRECATED: The program option 'concurrent' is deprecated and will be " - "removed soon. Please use the option '\"P, enable-thread-pool\"' instead.", + "if 'thread-pool-size' is greater than 0, for asynchronous execution.", cxxopts::value(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true")) ("d,input-tensor-data", @@ -266,7 +235,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value(m_ExNetParams.m_AllowExpandedDims)->default_value("false") ->implicit_value("true")) - ("I,iterations", + ("iterations", "Number of iterations to run the network for, default is set to 1. " "If you wish to run the model with different input data for every execution you can do so by " "supplying more input file paths to the 'input-tensor-data' option. " @@ -303,7 +272,6 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If unset, default to not quantized. Accepted values (true or false)" " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)", cxxopts::value(m_ExNetParams.m_QuantizeInput)->default_value("false")->implicit_value("true")) - ("r,threshold-time", "Threshold time is the maximum allowed time for inference measured in milliseconds. If the actual " "inference time is greater than the threshold time, the test will fail. By default, no threshold " @@ -333,17 +301,13 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("y,input-type", "The type of the input tensors in the network separated by comma. " "If unset, defaults to \"float\" for all defined inputs. " - "Accepted values (float, int, qasymms8 or qasymmu8)." - "DEPRECATED: The program option 'input-type' is deprecated and will be " - "removed soon. The input-types are now automatically set.", + "Accepted values (float, int, qasymms8 or qasymmu8).", cxxopts::value()) ("z,output-type", "The type of the output tensors in the network separated by comma. " "If unset, defaults to \"float\" for all defined outputs. " - "Accepted values (float, int, qasymms8 or qasymmu8)." - "DEPRECATED: The program option 'output-type' is deprecated and will be " - "removed soon. The input-types are now automatically set.", + "Accepted values (float, int, qasymms8 or qasymmu8).", cxxopts::value()) ("T,tflite-executor", @@ -353,21 +317,23 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "tflite is the TfliteInterpreter", cxxopts::value()->default_value("parser")) - ("C, compare-output", - "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. " - "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the " - "'concurrent' option is automatically set to true.", - cxxopts::value(m_ExNetParams.m_ComparisonFile)) + ("D,armnn-tflite-delegate", + "Enable Arm NN TfLite delegate. " + "DEPRECATED: This option is deprecated please use tflite-executor instead", + cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")) - ("B, compare-output-with-backend", - "Compare the output of the network with a different backend.", - cxxopts::value>()) - - ("A, compare-with-tflite", - "Compare the outout of the network with the tflite ref model.", - cxxopts::value(m_ExNetParams.m_CompareWithTflite)->default_value("false") - ->implicit_value("true")); + ("simultaneous-iterations", + "Number of simultaneous iterations to async-run the network for, default is set to 1 (disabled). " + "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used." + "DEPRECATED: This option is deprecated and will be removed soon. " + "Please use the option 'iterations' combined with 'concurrent' instead.", + cxxopts::value(m_ExNetParams.m_SimultaneousIterations)->default_value("1")) + ("thread-pool-size", + "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. " + "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the " + "'concurrent' option is automatically set to true.", + cxxopts::value(m_ExNetParams.m_ThreadPoolSize)->default_value("0")); m_CxxOptions.add_options("c) Optimization") ("bf16-turbo-mode", @@ -503,22 +469,21 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) CheckOptionDependencies(m_CxxResult); CheckForDeprecatedOptions(m_CxxResult); - if ((m_ExNetParams.m_OutputDetailsToStdOut || - m_ExNetParams.m_OutputDetailsOnlyToStdOut) && - !m_ExNetParams.m_EnableProfiling) - { - throw cxxopts::OptionParseException("You must enable profiling if you would like to output layer details"); - } - // Some options can't be assigned directly because they need some post-processing: auto computeDevices = GetOptionValue>("compute", m_CxxResult); m_ExNetParams.m_ComputeDevices = GetBackendIDs(computeDevices); + m_ExNetParams.m_ModelFormat = + armnn::stringUtils::StringTrimCopy(GetOptionValue("model-format", m_CxxResult)); m_ExNetParams.m_InputNames = ParseStringList(GetOptionValue("input-name", m_CxxResult), ","); m_ExNetParams.m_InputTensorDataFilePaths = ParseStringList(GetOptionValue("input-tensor-data", m_CxxResult), ","); m_ExNetParams.m_OutputNames = ParseStringList(GetOptionValue("output-name", m_CxxResult), ","); + m_ExNetParams.m_InputTypes = + ParseStringList(GetOptionValue("input-type", m_CxxResult), ","); + m_ExNetParams.m_OutputTypes = + ParseStringList(GetOptionValue("output-type", m_CxxResult), ","); m_ExNetParams.m_OutputTensorFiles = ParseStringList(GetOptionValue("write-outputs-to-file", m_CxxResult), ","); m_ExNetParams.m_GenerateTensorData = @@ -552,13 +517,13 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) { m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate; } - - // Set concurrent to true if the user expects to run inferences asynchronously - if (m_ExNetParams.m_Concurrent) + if (m_ExNetParams.m_SimultaneousIterations > 1) { - m_ExNetParams.m_ThreadPoolSize = 1; + m_ExNetParams.m_Iterations = m_ExNetParams.m_SimultaneousIterations; + m_ExNetParams.m_Concurrent = true; } + // Set concurrent to true if the user expects to run inferences asynchronously if (m_ExNetParams.m_ThreadPoolSize > 0) { m_ExNetParams.m_Concurrent = true; @@ -578,7 +543,7 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) std::vector dims = ParseArray(ss); m_ExNetParams.m_InputTensorShapes.push_back( - armnn::TensorShape{static_cast(dims.size()), dims.data()}); + std::make_unique(static_cast(dims.size()), dims.data())); } } @@ -603,12 +568,5 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) } ValidateRuntimeOptions(); - - auto comparisonComputDevices = GetOptionValue>("compare-output-with-backend", m_CxxResult); - - if (!comparisonComputDevices.empty()) - { - m_ExNetParams.m_ComparisonComputeDevices = GetBackendIDs(comparisonComputDevices); - } } diff --git a/tests/ExecuteNetwork/IExecutor.hpp b/tests/ExecuteNetwork/IExecutor.hpp deleted file mode 100644 index 4ed6cbde84..0000000000 --- a/tests/ExecuteNetwork/IExecutor.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once -#include - -/// IExecutor executes a network -class IExecutor -{ -public: - /// Execute the given network - /// @return std::vector A type erased vector of the outputs, - /// that can be compared with the output of another IExecutor - virtual std::vector Execute() = 0; - /// Print available information about the network - virtual void PrintNetworkInfo() = 0; - /// Compare the output with the result of another IExecutor - virtual void CompareAndPrintResult(std::vector otherOutput) = 0; - virtual ~IExecutor(){}; -}; diff --git a/tests/ExecuteNetwork/TfliteExecutor.cpp b/tests/ExecuteNetwork/TfliteExecutor.cpp deleted file mode 100644 index f7a3068d7b..0000000000 --- a/tests/ExecuteNetwork/TfliteExecutor.cpp +++ /dev/null @@ -1,251 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "TfliteExecutor.hpp" - -TfLiteExecutor::TfLiteExecutor(const ExecuteNetworkParams& params) : m_Params(params) -{ - std::unique_ptr model = - tflite::FlatBufferModel::BuildFromFile(m_Params.m_ModelPath.c_str()); - - m_TfLiteInterpreter = std::make_unique(); - tflite::ops::builtin::BuiltinOpResolver resolver; - - tflite::InterpreterBuilder builder(*model, resolver); - builder(&m_TfLiteInterpreter); - m_TfLiteInterpreter->AllocateTensors(); - - int status; - if (m_Params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate) - { - // Create the Armnn Delegate - // Populate a DelegateOptions from the ExecuteNetworkParams. - armnnDelegate::DelegateOptions delegateOptions = m_Params.ToDelegateOptions(); - delegateOptions.SetExternalProfilingParams(delegateOptions.GetExternalProfilingParams()); - - std::unique_ptr - theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), - armnnDelegate::TfLiteArmnnDelegateDelete); - // Register armnn_delegate to TfLiteInterpreter - status = m_TfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate)); - if (status == kTfLiteError) - { - LogAndThrow("Could not register ArmNN TfLite Delegate to TfLiteInterpreter"); - } - } - else - { - std::cout << "Running on TfLite without ArmNN delegate\n"; - } - - armnn::Optional dataFile = m_Params.m_GenerateTensorData - ? armnn::EmptyOptional() - : armnn::MakeOptional(m_Params.m_InputTensorDataFilePaths[0]); - - const size_t numInputs = m_Params.m_InputNames.size(); - - for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex) - { - int input = m_TfLiteInterpreter->inputs()[inputIndex]; - - TfLiteIntArray* inputDims = m_TfLiteInterpreter->tensor(input)->dims; - - unsigned int inputSize = 1; - for (unsigned int dim = 0; dim < static_cast(inputDims->size); ++dim) - { - inputSize *= inputDims->data[dim]; - } - - const auto& inputName = m_TfLiteInterpreter->input_tensor(input)->name; - const auto& dataType = m_TfLiteInterpreter->input_tensor(input)->type; - - switch (dataType) - { - case kTfLiteFloat32: - { - auto inputData = m_TfLiteInterpreter->typed_tensor(input); - PopulateTensorWithData(inputData, inputSize, dataFile, inputName); - break; - } - case kTfLiteInt32: - { - auto inputData = m_TfLiteInterpreter->typed_tensor(input); - PopulateTensorWithData(inputData, inputSize, dataFile, inputName); - break; - } - case kTfLiteUInt8: - { - auto inputData = m_TfLiteInterpreter->typed_tensor(input); - PopulateTensorWithData(inputData, inputSize, dataFile, inputName); - break; - } - case kTfLiteInt16: - { - auto inputData = m_TfLiteInterpreter->typed_tensor(input); - PopulateTensorWithData(inputData, inputSize, dataFile, inputName); - break; - } - case kTfLiteInt8: - { - auto inputData = m_TfLiteInterpreter->typed_tensor(input); - PopulateTensorWithData(inputData, inputSize, dataFile, inputName); - break; - } - default: - { - LogAndThrow("Unsupported input tensor data type"); - } - } - } -} - -std::vector TfLiteExecutor::Execute() -{ - int status = 0; - std::vector results; - for (size_t x = 0; x < m_Params.m_Iterations; x++) - { - // Start timer to record inference time in milliseconds. - const auto start_time = armnn::GetTimeNow(); - // Run the inference - status = m_TfLiteInterpreter->Invoke(); - const auto duration = armnn::GetTimeDuration(start_time); - - if (m_Params.m_DontPrintOutputs || m_Params.m_ReuseBuffers) - { - break; - } - // Print out the output - for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex) - { - auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex]; - TfLiteIntArray* outputDims = m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims; - // If we've been asked to write to a file then set a file output stream. Otherwise use stdout. - FILE* outputTensorFile = stdout; - if (!m_Params.m_OutputTensorFiles.empty()) - { - outputTensorFile = fopen(m_Params.m_OutputTensorFiles[outputIndex].c_str(), "w"); - if (outputTensorFile == NULL) - { - LogAndThrow("Specified output tensor file, \"" + m_Params.m_OutputTensorFiles[outputIndex] + - "\", cannot be created. Defaulting to stdout. Error was: " + std::strerror(errno)); - } - else - { - ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '" - << m_Params.m_OutputTensorFiles[outputIndex] << "'"; - } - } - long outputSize = 1; - for (unsigned int dim = 0; dim < static_cast(outputDims->size); ++dim) - { - outputSize *= outputDims->data[dim]; - } - - std::cout << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name << ": "; - results.push_back(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation); - - switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type) - { - - case kTfLiteFloat32: - { - auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]); - } - break; - } - case kTfLiteInt32: - { - auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); - } - break; - } - case kTfLiteUInt8: - { - auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]); - } - break; - } - case kTfLiteInt8: - { - auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); - } - break; - } - default: - { - LogAndThrow("Unsupported output type"); - } - } - - std::cout << std::endl; - } - CheckInferenceTimeThreshold(duration, m_Params.m_ThresholdTime); - } - - std::cout << status; - return results; -} - -void TfLiteExecutor::CompareAndPrintResult(std::vector otherOutput) -{ - for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex) - { - auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex]; - float result = 0; - switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type) - { - case kTfLiteFloat32: - { - result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, - otherOutput[outputIndex], - m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); - - break; - } - case kTfLiteInt32: - { - result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, - otherOutput[outputIndex], - m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); - break; - } - case kTfLiteUInt8: - { - result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, - otherOutput[outputIndex], - m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); - break; - } - case kTfLiteInt8: - { - result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, - otherOutput[outputIndex], - m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); - break; - } - default: - { - } - } - - std::cout << "RMSE of " - << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name - << ": " << result << std::endl; - } -}; diff --git a/tests/ExecuteNetwork/TfliteExecutor.hpp b/tests/ExecuteNetwork/TfliteExecutor.hpp deleted file mode 100644 index 009c79488e..0000000000 --- a/tests/ExecuteNetwork/TfliteExecutor.hpp +++ /dev/null @@ -1,35 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include "IExecutor.hpp" -#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" -#include "ExecuteNetworkProgramOptions.hpp" -#include "armnn/utility/NumericCast.hpp" -#include "armnn/utility/Timer.hpp" - -#include -#include - -#include -#include -#include - -using namespace tflite; -class TfLiteExecutor : public IExecutor -{ -public: - TfLiteExecutor(const ExecuteNetworkParams& m_Params); - - std::vector Execute() override; - void PrintNetworkInfo() override{}; - void CompareAndPrintResult(std::vector otherOutput) override; - -private: - std::unique_ptr m_Model; - const ExecuteNetworkParams& m_Params; - std::unique_ptr m_TfLiteInterpreter; -}; - diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 268f60301c..93716e1a6f 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -15,7 +15,6 @@ #include #include -#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" #include @@ -47,6 +46,40 @@ #include #include +namespace +{ + +inline bool CheckRequestedBackendsAreValid(const std::vector& backendIds, + armnn::Optional invalidBackendIds = armnn::EmptyOptional()) +{ + if (backendIds.empty()) + { + return false; + } + + armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds(); + + bool allValid = true; + for (const auto& backendId : backendIds) + { + if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end()) + { + allValid = false; + if (invalidBackendIds) + { + if (!invalidBackendIds.value().empty()) + { + invalidBackendIds.value() += ", "; + } + invalidBackendIds.value() += backendId; + } + } + } + return allValid; +} + +} // anonymous namespace + namespace InferenceModelInternal { using BindingPointInfo = armnn::BindingPointInfo; diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp index 2d3567bd24..6c74aaa6ed 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp @@ -1,12 +1,110 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "NetworkExecutionUtils.hpp" #include -#include +#include +#include + +#if defined(ARMNN_SERIALIZER) +#include "armnnDeserializer/IDeserializer.hpp" +#endif +#if defined(ARMNN_TF_LITE_PARSER) +#include "armnnTfLiteParser/ITfLiteParser.hpp" +#endif +#if defined(ARMNN_ONNX_PARSER) +#include "armnnOnnxParser/IOnnxParser.hpp" +#endif + +template +auto ParseDataArray(std::istream& stream); + +template +auto ParseDataArray(std::istream& stream, + const float& quantizationScale, + const int32_t& quantizationOffset); + +template<> +auto ParseDataArray(std::istream& stream) +{ + return ParseArrayImpl(stream, [](const std::string& s) { return std::stof(s); }); +} + +template<> +auto ParseDataArray(std::istream& stream) +{ + return ParseArrayImpl(stream, [](const std::string& s) { return std::stoi(s); }); +} + +template<> +auto ParseDataArray(std::istream& stream) +{ + return ParseArrayImpl(stream, + [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); +} + +template<> +auto ParseDataArray(std::istream& stream) +{ + return ParseArrayImpl(stream, + [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); +} + + +template<> +auto ParseDataArray(std::istream& stream) +{ + return ParseArrayImpl(stream, + [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); +} + +template<> +auto ParseDataArray(std::istream& stream, + const float& quantizationScale, + const int32_t& quantizationOffset) +{ + return ParseArrayImpl(stream, + [&quantizationScale, &quantizationOffset](const std::string& s) + { + return armnn::numeric_cast( + armnn::Quantize(std::stof(s), + quantizationScale, + quantizationOffset)); + }); +} + +template<> +auto ParseDataArray(std::istream& stream, + const float& quantizationScale, + const int32_t& quantizationOffset) +{ + return ParseArrayImpl(stream, + [&quantizationScale, &quantizationOffset](const std::string& s) + { + return armnn::numeric_cast( + armnn::Quantize(std::stof(s), + quantizationScale, + quantizationOffset)); + }); +} + +template> +std::vector GenerateDummyTensorData(unsigned int numElements) +{ + return std::vector(numElements, static_cast(0)); +} + + +std::vector ParseArray(std::istream& stream) +{ + return ParseArrayImpl( + stream, + [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); +} + std::vector ParseStringList(const std::string& inputString, const char* delimiter) { std::stringstream stream(inputString); @@ -14,27 +112,189 @@ std::vector ParseStringList(const std::string& inputString, const c return armnn::stringUtils::StringTrimCopy(s); }, delimiter); } -bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, - const double& thresholdTime) + +TensorPrinter::TensorPrinter(const std::string& binding, + const armnn::TensorInfo& info, + const std::string& outputTensorFile, + bool dequantizeOutput, + const bool printToConsole) + : m_OutputBinding(binding) + , m_Scale(info.GetQuantizationScale()) + , m_Offset(info.GetQuantizationOffset()) + , m_OutputTensorFile(outputTensorFile) + , m_DequantizeOutput(dequantizeOutput) + , m_PrintToConsole(printToConsole) {} + +void TensorPrinter::operator()(const std::vector& values) +{ + if (m_PrintToConsole) + { + std::cout << m_OutputBinding << ": "; + ForEachValue(values, [](float value) + { + printf("%f ", value); + }); + printf("\n"); + } + WriteToFile(values); +} + +void TensorPrinter::operator()(const std::vector& values) { - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << duration.count() << " ms\n"; - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (thresholdTime != 0.0) + if(m_DequantizeOutput) { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << thresholdTime << " ms"; - auto thresholdMinusInference = thresholdTime - duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - if (thresholdMinusInference < 0) + auto& scale = m_Scale; + auto& offset = m_Offset; + std::vector dequantizedValues; + ForEachValue(values, [&scale, &offset, &dequantizedValues](uint8_t value) + { + auto dequantizedValue = armnn::Dequantize(value, scale, offset); + dequantizedValues.push_back(dequantizedValue); + }); + + if (m_PrintToConsole) { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - return false; + std::cout << m_OutputBinding << ": "; + ForEachValue(dequantizedValues, [](float value) + { + printf("%f ", value); + }); + printf("\n"); } + + WriteToFile(dequantizedValues); } - return true; + else + { + const std::vector intValues(values.begin(), values.end()); + operator()(intValues); + } +} + +void TensorPrinter::operator()(const std::vector& values) +{ + if (m_PrintToConsole) + { + std::cout << m_OutputBinding << ": "; + ForEachValue(values, [](int8_t value) + { + printf("%d ", value); + }); + printf("\n"); + } + WriteToFile(values); +} + +void TensorPrinter::operator()(const std::vector& values) +{ + if (m_PrintToConsole) + { + std::cout << m_OutputBinding << ": "; + ForEachValue(values, [](int value) + { + printf("%d ", value); + }); + printf("\n"); + } + WriteToFile(values); +} + +template +void TensorPrinter::ForEachValue(const Container& c, Delegate delegate) +{ + for (const auto& value : c) + { + delegate(value); + } +} + +template +void TensorPrinter::WriteToFile(const std::vector& values) +{ + if (!m_OutputTensorFile.empty()) + { + std::ofstream outputTensorFile; + outputTensorFile.open(m_OutputTensorFile, std::ofstream::out | std::ofstream::trunc); + if (outputTensorFile.is_open()) + { + outputTensorFile << m_OutputBinding << ": "; + std::copy(values.begin(), values.end(), std::ostream_iterator(outputTensorFile, " ")); + } + else + { + ARMNN_LOG(info) << "Output Tensor File: " << m_OutputTensorFile << " could not be opened!"; + } + outputTensorFile.close(); + } +} + +void PopulateTensorWithData(armnnUtils::TContainer& tensorData, + unsigned int numElements, + const std::string& dataTypeStr, + const armnn::Optional& qParams, + const armnn::Optional& dataFile) +{ + const bool readFromFile = dataFile.has_value() && !dataFile.value().empty(); + const bool quantizeData = qParams.has_value(); + + std::ifstream inputTensorFile; + if (readFromFile) + { + inputTensorFile = std::ifstream(dataFile.value()); + } + + if (dataTypeStr.compare("float") == 0) + { + if (quantizeData) + { + const float qScale = qParams.value().first; + const int qOffset = qParams.value().second; + + tensorData = readFromFile ? + ParseDataArray(inputTensorFile, qScale, qOffset) : + GenerateDummyTensorData(numElements); + } + else + { + tensorData = readFromFile ? + ParseDataArray(inputTensorFile) : + GenerateDummyTensorData(numElements); + } + } + else if (dataTypeStr.compare("int") == 0) + { + tensorData = readFromFile ? + ParseDataArray(inputTensorFile) : + GenerateDummyTensorData(numElements); + } + else if (dataTypeStr.compare("qsymms8") == 0) + { + tensorData = readFromFile ? + ParseDataArray(inputTensorFile) : + GenerateDummyTensorData(numElements); + } + else if (dataTypeStr.compare("qasymm8") == 0 || dataTypeStr.compare("qasymmu8") == 0) + { + tensorData = readFromFile ? + ParseDataArray(inputTensorFile) : + GenerateDummyTensorData(numElements); + } + else if (dataTypeStr.compare("qasymms8") == 0) + { + tensorData = readFromFile ? + ParseDataArray(inputTensorFile) : + GenerateDummyTensorData(numElements); + } + else + { + std::string errorMessage = "Unsupported tensor data type " + dataTypeStr; + ARMNN_LOG(fatal) << errorMessage; + + inputTensorFile.close(); + throw armnn::Exception(errorMessage); + } + + inputTensorFile.close(); } bool ValidatePath(const std::string& file, const bool expectFile) @@ -52,13 +312,6 @@ bool ValidatePath(const std::string& file, const bool expectFile) return true; } -std::vector ParseArray(std::istream& stream) -{ - return ParseArrayImpl( - stream, - [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); -} - bool ValidatePaths(const std::vector& fileVec, const bool expectFile) { bool allPathsValid = true; @@ -72,9 +325,5 @@ bool ValidatePaths(const std::vector& fileVec, const bool expectFil return allPathsValid; } -void LogAndThrow(std::string eMsg) -{ - ARMNN_LOG(error) << eMsg; - throw armnn::Exception(eMsg); -} + diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp index 14d7fe5551..bc2868ab35 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp @@ -1,83 +1,63 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once +#include +#include #include #include -#include -#include +#include #include #include -#include -#include -/** - * Given a measured duration and a threshold time tell the user whether we succeeded or not. - * - * @param duration the measured inference duration. - * @param thresholdTime the threshold time in milliseconds. - * @return false if the measured time exceeded the threshold. - */ -bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, - const double& thresholdTime); - -inline bool CheckRequestedBackendsAreValid(const std::vector& backendIds, - armnn::Optional invalidBackendIds = armnn::EmptyOptional()) -{ - if (backendIds.empty()) - { - return false; - } - - armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds(); - - bool allValid = true; - for (const auto& backendId : backendIds) - { - if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end()) - { - allValid = false; - if (invalidBackendIds) - { - if (!invalidBackendIds.value().empty()) - { - invalidBackendIds.value() += ", "; - } - invalidBackendIds.value() += backendId; - } - } - } - return allValid; -} std::vector ParseArray(std::istream& stream); /// Splits a given string at every accurance of delimiter into a vector of string std::vector ParseStringList(const std::string& inputString, const char* delimiter); -/// Dequantize an array of a given type -/// @param array Type erased array to dequantize -/// @param numElements Elements in the array -/// @param array Type erased array to dequantize -template -std::vector DequantizeArray(const void* array, unsigned int numElements, float scale, int32_t offset) +struct TensorPrinter { - const T* quantizedArray = reinterpret_cast(array); - std::vector dequantizedVector; - dequantizedVector.reserve(numElements); - for (unsigned int i = 0; i < numElements; ++i) - { - float f = armnn::Dequantize(*(quantizedArray + i), scale, offset); - dequantizedVector.push_back(f); - } - return dequantizedVector; -} + TensorPrinter(const std::string& binding, + const armnn::TensorInfo& info, + const std::string& outputTensorFile, + bool dequantizeOutput, + bool printToConsole = true); + + void operator()(const std::vector& values); + + void operator()(const std::vector& values); + + void operator()(const std::vector& values); + + void operator()(const std::vector& values); + +private: + template + void ForEachValue(const Container& c, Delegate delegate); + + template + void WriteToFile(const std::vector& values); + + std::string m_OutputBinding; + float m_Scale; + int m_Offset; + std::string m_OutputTensorFile; + bool m_DequantizeOutput; + bool m_PrintToConsole; +}; + +using QuantizationParams = std::pair; -void LogAndThrow(std::string eMsg); +void PopulateTensorWithData(armnnUtils::TContainer& tensorData, + unsigned int numElements, + const std::string& dataTypeStr, + const armnn::Optional& qParams, + const armnn::Optional& dataFile); /** * Verifies if the given string is a valid path. Reports invalid paths to std::err. @@ -95,152 +75,6 @@ bool ValidatePath(const std::string& file, const bool expectFile); * */ bool ValidatePaths(const std::vector& fileVec, const bool expectFile); -/// Returns a function of read the given type as a string -template ::value>* = nullptr> -std::function GetParseElementFunc() -{ - return [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }; -} - -template ::value>* = nullptr> -std::function GetParseElementFunc() -{ - return [](const std::string& s) { return std::stof(s); }; -} - -template -void PopulateTensorWithData(T* tensor, - const unsigned int numElements, - const armnn::Optional& dataFile, - const std::string& inputName) -{ - const bool readFromFile = dataFile.has_value() && !dataFile.value().empty(); - - std::ifstream inputTensorFile; - if (!readFromFile) - { - std::fill(tensor, tensor + numElements, 0); - return; - } - else - { - inputTensorFile = std::ifstream(dataFile.value()); - } - - auto parseElementFunc = GetParseElementFunc(); - std::string line; - unsigned int index = 0; - while (std::getline(inputTensorFile, line)) - { - std::vector tokens = armnn::stringUtils::StringTokenizer(line, "\t ,:"); - for (const std::string& token : tokens) - { - if (!token.empty()) // See https://stackoverflow.com/questions/10437406/ - { - try - { - if (index == numElements) - { - ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << dataFile.value() - << "\" does not match number of elements: " << numElements - << " for input \"" << inputName << "\"."; - } - *(tensor + index) = parseElementFunc(token); - index++; - } - catch (const std::exception&) - { - ARMNN_LOG(error) << "'" << token << "' is not a valid number. It has been ignored."; - } - } - } - } - - if (index != numElements) - { - ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << inputName - << "\" does not match number of elements: " << numElements - << " for input \"" << inputName << "\"."; - } -} - -template -void WriteToFile(const std::string& outputTensorFileName, - const std::string& outputName, - const T* const array, - const unsigned int numElements) -{ - std::ofstream outputTensorFile; - outputTensorFile.open(outputTensorFileName, std::ofstream::out | std::ofstream::trunc); - if (outputTensorFile.is_open()) - { - outputTensorFile << outputName << ": "; - std::copy(array, array + numElements, std::ostream_iterator(outputTensorFile, " ")); - } - else - { - ARMNN_LOG(info) << "Output Tensor File: " << outputTensorFileName << " could not be opened!"; - } - outputTensorFile.close(); -} - -struct OutputWriteInfo -{ - const armnn::Optional& m_OutputTensorFile; - const std::string& m_OutputName; - const armnn::Tensor& m_Tensor; - const bool m_PrintTensor; -}; - -template -void PrintTensor(OutputWriteInfo& info, const char* formatString) -{ - const T* array = reinterpret_cast(info.m_Tensor.GetMemoryArea()); - - if (info.m_OutputTensorFile.has_value()) - { - WriteToFile(info.m_OutputTensorFile.value(), - info.m_OutputName, - array, - info.m_Tensor.GetNumElements()); - } - - if (info.m_PrintTensor) - { - for (unsigned int i = 0; i < info.m_Tensor.GetNumElements(); i++) - { - printf(formatString, array[i]); - } - } -} - -template -void PrintQuantizedTensor(OutputWriteInfo& info) -{ - std::vector dequantizedValues; - auto tensor = info.m_Tensor; - dequantizedValues = DequantizeArray(tensor.GetMemoryArea(), - tensor.GetNumElements(), - tensor.GetInfo().GetQuantizationScale(), - tensor.GetInfo().GetQuantizationOffset()); - - if (info.m_OutputTensorFile.has_value()) - { - WriteToFile(info.m_OutputTensorFile.value(), - info.m_OutputName, - dequantizedValues.data(), - tensor.GetNumElements()); - } - - if (info.m_PrintTensor) - { - std::for_each(dequantizedValues.begin(), dequantizedValues.end(), [&](float value) - { - printf("%f ", value); - }); - } -} - template std::vector ParseArrayImpl(std::istream& stream, TParseElementFunc parseElementFunc, const char* chars = "\t ,:") { @@ -269,28 +103,21 @@ std::vector ParseArrayImpl(std::istream& stream, TParseElementFunc parseEleme return result; } -/// Compute the root-mean-square error (RMSE) -/// @param expected -/// @param actual -/// @param size size of the tensor -/// @return float the RMSE -template -float ComputeRMSE(const void* expected, const void* actual, const size_t size) +template +void PopulateTensorWithDataGeneric(std::vector& tensorData, + unsigned int numElements, + const armnn::Optional& dataFile, + TParseElementFunc parseFunction) { - auto typedExpected = reinterpret_cast(expected); - auto typedActual = reinterpret_cast(actual); - - T errorSum = 0; + const bool readFromFile = dataFile.has_value() && !dataFile.value().empty(); - for (unsigned int i = 0; i < size; i++) + std::ifstream inputTensorFile; + if (readFromFile) { - if (std::abs(typedExpected[i] - typedActual[i]) != 0) - { - std::cout << ""; - } - errorSum += std::pow(std::abs(typedExpected[i] - typedActual[i]), 2); + inputTensorFile = std::ifstream(dataFile.value()); } - float rmse = std::sqrt(armnn::numeric_cast(errorSum) / armnn::numeric_cast(size / sizeof(T))); - return rmse; -} \ No newline at end of file + tensorData = readFromFile ? + ParseArrayImpl(inputTensorFile, parseFunction) : + std::vector(numElements, static_cast(0)); +} -- cgit v1.2.1