From fbd2817039db8f856d75cf9d3d3980baeaa554c9 Mon Sep 17 00:00:00 2001 From: Teresa Charlin Date: Thu, 7 Jul 2022 14:24:59 +0100 Subject: Revert "Revert "IVGCVSW-6650 Refactor ExecuteNetwork"" This reverts commit 1a7f033768acb27da11503bd29abb468d2e77f9e. List of fixes to be able to add this code again: * "emplacing_back" the vector inputTensors into the vector m_InputTensorsVec outside the for loop * GetIOInfo() uses IOptimizedNetwork instead of INetwork, where the infered shapes are not saved * Add missing data type Signed32 to SetupInputsAndOutputs() * PrintOutputTensors() prints the actual output without dequantizing * Add profilingDetailsMethod as input in networkProperties in ArmNNExecutor constructor * Fix typos Change-Id: I91de166f87228282db3efa27431fe91458834442 Signed-off-by: Teresa Charlin Change-Id: Ic6634d48892d11e5f146cdf285e1e333e93e9937 Signed-off-by: Francis Murtagh --- include/armnn/INetwork.hpp | 4 +- include/armnn/TypesUtils.hpp | 24 +- src/armnn/Network.cpp | 13 + src/armnn/OptimizedNetworkImpl.hpp | 3 + tests/CMakeLists.txt | 10 + tests/ExecuteNetwork/ArmNNExecutor.cpp | 805 +++++++++++++++ tests/ExecuteNetwork/ArmNNExecutor.hpp | 161 +++ tests/ExecuteNetwork/ExecuteNetwork.cpp | 1076 +------------------- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 134 +-- tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 90 +- .../ExecuteNetworkProgramOptions.cpp | 165 +-- tests/ExecuteNetwork/IExecutor.hpp | 22 + tests/ExecuteNetwork/TfliteExecutor.cpp | 251 +++++ tests/ExecuteNetwork/TfliteExecutor.hpp | 35 + tests/InferenceModel.hpp | 37 +- .../NetworkExecutionUtils.cpp | 309 +----- .../NetworkExecutionUtils.hpp | 279 ++++- 17 files changed, 1774 insertions(+), 1644 deletions(-) create mode 100644 tests/ExecuteNetwork/ArmNNExecutor.cpp create mode 100644 tests/ExecuteNetwork/ArmNNExecutor.hpp create mode 100644 tests/ExecuteNetwork/IExecutor.hpp create mode 100644 tests/ExecuteNetwork/TfliteExecutor.cpp create mode 100644 tests/ExecuteNetwork/TfliteExecutor.hpp diff --git a/include/armnn/INetwork.hpp b/include/armnn/INetwork.hpp index 349c7e87b5..ecc888d9c1 100644 --- a/include/armnn/INetwork.hpp +++ b/include/armnn/INetwork.hpp @@ -801,8 +801,10 @@ public: size_t GetNumInputs() const; size_t GetNumOutputs() const; + void ExecuteStrategy(IStrategy& strategy) const; + // Creates a copy of the IOptimizedNetwork. The IOptimizedNetwork will not be reoptimized, - // the provided ModelOptions will only be used when creating a LoadedNetwork. + // the provided ModelOptions will only be used when creating a LoadedNetwork. IOptimizedNetwork(const IOptimizedNetwork& other, const ModelOptions& modelOptions); IOptimizedNetwork(std::unique_ptr graph); IOptimizedNetwork(std::unique_ptr impl); diff --git a/include/armnn/TypesUtils.hpp b/include/armnn/TypesUtils.hpp index ccb0280457..ca7e7c58ac 100644 --- a/include/armnn/TypesUtils.hpp +++ b/include/armnn/TypesUtils.hpp @@ -301,16 +301,30 @@ inline std::ostream& operator<<(std::ostream& os, Status stat) } -inline std::ostream & operator<<(std::ostream & os, const armnn::TensorShape & shape) +inline std::ostream& operator<<(std::ostream& os, const armnn::TensorShape& shape) { os << "["; - for (uint32_t i=0; iExecuteStrategy(strategy); +} + +void OptimizedNetworkImpl::ExecuteStrategy(IStrategy &strategy) const +{ + for (auto layer : GetGraph()) + { + layer->ExecuteStrategy(strategy); + }; +} + } // namespace armnn diff --git a/src/armnn/OptimizedNetworkImpl.hpp b/src/armnn/OptimizedNetworkImpl.hpp index cb0dc4c8cf..45809d5619 100644 --- a/src/armnn/OptimizedNetworkImpl.hpp +++ b/src/armnn/OptimizedNetworkImpl.hpp @@ -25,8 +25,11 @@ public: virtual size_t GetNumOutputs() const; Graph& GetGraph() { return *m_Graph; } + Graph& GetGraph() const { return *m_Graph; } ModelOptions& GetModelOptions() { return m_ModelOptions; } + void ExecuteStrategy(IStrategy& strategy) const; + private: std::unique_ptr m_Graph; arm::pipe::ProfilingGuid m_Guid; diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 9f377c6466..9ac9bcb636 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -144,6 +144,9 @@ if (BUILD_ARMNN_SERIALIZER OR BUILD_ONNX_PARSER OR BUILD_ARMNN_TFLITE_DELEGATE) set(ExecuteNetwork_sources + ExecuteNetwork/IExecutor.hpp + ExecuteNetwork/ArmNNExecutor.cpp + ExecuteNetwork/ArmNNExecutor.hpp ExecuteNetwork/ExecuteNetwork.cpp ExecuteNetwork/ExecuteNetworkProgramOptions.cpp ExecuteNetwork/ExecuteNetworkProgramOptions.hpp @@ -152,6 +155,13 @@ if (BUILD_ARMNN_SERIALIZER NetworkExecutionUtils/NetworkExecutionUtils.cpp NetworkExecutionUtils/NetworkExecutionUtils.hpp) + if(BUILD_ARMNN_TFLITE_DELEGATE) + set(ExecuteNetwork_sources + ${ExecuteNetwork_sources} + ExecuteNetwork/TfliteExecutor.cpp + ExecuteNetwork/TfliteExecutor.hpp) + endif() + add_executable_ex(ExecuteNetwork ${ExecuteNetwork_sources}) target_include_directories(ExecuteNetwork PRIVATE ../src/armnn) target_include_directories(ExecuteNetwork PRIVATE ../src/armnnUtils) diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp new file mode 100644 index 0000000000..5be3383061 --- /dev/null +++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp @@ -0,0 +1,805 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + + +#include "ArmNNExecutor.hpp" +#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" + +#include +#include + + +using namespace armnn; +using namespace std::chrono; + +ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions) +: m_Params(params) +{ + runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling; + runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath; + m_Runtime = armnn::IRuntime::Create(runtimeOptions); + + auto parser = CreateParser(); + auto network = parser->CreateNetwork(m_Params); + auto optNet = OptimizeNetwork(network.get()); + + m_IOInfo = GetIOInfo(optNet.get()); + SetupInputsAndOutputs(); + + std::string errorMsg; + + armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined; + if (params.m_OutputDetailsOnlyToStdOut) + { + profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly; + } + else if (params.m_OutputDetailsToStdOut) + { + profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents; + } + + INetworkProperties networkProperties{m_Params.m_Concurrent, + MemorySource::Undefined, + MemorySource::Undefined, + params.m_EnableProfiling, + profilingDetailsMethod}; + + m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties); + + if (m_Params.m_Iterations > 1) + { + std::stringstream msg; + msg << "Network will be executed " << m_Params.m_Iterations; + if (m_Params.m_Concurrent) + { + msg << " times in an asynchronous manner. "; + } + else + { + msg << " times successively. "; + } + msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to " + "cover each execution."; + ARMNN_LOG(info) << msg.str(); + } + + if (m_Params.m_GenerateTensorData) + { + ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; + } + + if (m_Params.m_DontPrintOutputs) + { + ARMNN_LOG(info) << "Printing outputs to console is disabled."; + } +} + +void ArmNNExecutor::ExecuteAsync() +{ + std::vector> memHandles; + std::unique_ptr threadpool; + armnn::AsyncCallbackManager callbackManager; + std::unordered_map inferenceOutputMap; + + for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i) + { + memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId)); + } + + threadpool = std::make_unique(m_Params.m_ThreadPoolSize, + m_Runtime.get(), + memHandles); + + ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n"; + // Declare the latest and earliest inference times here to be used when calculating overall time + std::chrono::high_resolution_clock::time_point earliestStartTime = + std::chrono::high_resolution_clock::time_point::max(); + std::chrono::high_resolution_clock::time_point latestEndTime = + std::chrono::high_resolution_clock::now(); + + // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the + // LoadedNetwork with each scheduled inference having a specific priority + for (size_t i = 0; i < m_Params.m_Iterations; ++i) + { + std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); + + std::shared_ptr cb = callbackManager.GetNewCallback(); + inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]}); + threadpool->Schedule(m_NetworkId, + m_InputTensorsVec[i], + m_OutputTensorsVec[i], + armnn::QosExecPriority::Medium, + cb); + } + + // Check the results + for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration) + { + auto cb = callbackManager.GetNotifiedCallback(); + + // Get the results + if (earliestStartTime > cb->GetStartTime()) + { + earliestStartTime = cb->GetStartTime(); + } + if (latestEndTime < cb->GetEndTime()) + { + latestEndTime = cb->GetEndTime(); + } + + auto startTime = time_point_cast(cb->GetStartTime()); + auto endTime = time_point_cast(cb->GetEndTime()); + auto inferenceDuration = endTime - startTime; + CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime); + if(!m_Params.m_DontPrintOutputs) + { + const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()]; + PrintOutputTensors(out, iteration); + } + } + + // Print duration difference between overallStartTime and overallEndTime + auto overallEndTime = time_point_cast(latestEndTime); + auto overallStartTime = time_point_cast(earliestStartTime); + auto totalInferenceDuration = overallEndTime - overallStartTime; + ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2) + << std::fixed << totalInferenceDuration.count() << " ms\n"; + +} + +void ArmNNExecutor::ExecuteSync() +{ + for (size_t x = 0; x < m_Params.m_Iterations; x++) + { + std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkId); + + const auto start_time = armnn::GetTimeNow(); + armnn::Status ret; + if (m_Params.m_ImportInputsIfAligned) + { + ret = m_Runtime->EnqueueWorkload(m_NetworkId, + m_InputTensorsVec[x], + m_OutputTensorsVec[x], + m_ImportedInputIds[x], + m_ImportedOutputIds[x]); + } + else + { + ret = m_Runtime->EnqueueWorkload(m_NetworkId, + m_InputTensorsVec[x], + m_OutputTensorsVec[x]); + } + + const auto inferenceDuration = armnn::GetTimeDuration(start_time); + + // If profiling is enabled print out the results + if(profiler && profiler->IsProfilingEnabled()) + { + profiler->Print(std::cout); + } + + if(ret == armnn::Status::Failure) + { + throw armnn::Exception("IRuntime::EnqueueWorkload failed"); + } + + if(!m_Params.m_DontPrintOutputs) + { + PrintOutputTensors(&m_OutputTensorsVec[x], x); + } + + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime); + } +} + +std::vector ArmNNExecutor::Execute() +{ + if(m_Params.m_ThreadPoolSize == 0) + { + ExecuteSync(); + } + else + { + ExecuteAsync(); + } + std::vector results; + for (auto& output : m_OutputStorage) + { + results.push_back(output.m_Mem); + } + + return results; +} + +void ArmNNExecutor::PrintNetworkInfo() +{ + const std::vector& inputNames = m_Params.m_InputNames.size() != 0 ? + m_Params.m_InputNames : + m_IOInfo.m_InputNames; + std::stringstream ss; + ss << "===== Network Info =====\n"; + ss << "Inputs in order:\n"; + for (const auto& inputName : inputNames) + { + const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second; + ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType()); + if (inputInfo.IsQuantized()) + { + ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset(); + if (inputInfo.HasMultipleQuantizationScales()) + { + ss << " Quantization scales: "; + for (const auto scale: inputInfo.GetQuantizationScales()) + { + ss << scale << ", "; + } + } + else + { + ss << " Quantization scale: " << inputInfo.GetQuantizationScale(); + } + } + ss << "\n"; + } + + ss << "Outputs in order:\n"; + for (const auto& outputName : m_IOInfo.m_OutputNames) + { + const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second; + ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType()); + if (outputInfo.IsQuantized()) + { + ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset(); + if (outputInfo.HasMultipleQuantizationScales()) + { + ss << " Quantization scales: "; + for (const auto scale: outputInfo.GetQuantizationScales()) + { + ss << scale << ", "; + } + } + else + { + ss << " Quantization scale: " << outputInfo.GetQuantizationScale(); + } + } + ss << "\n"; + } + + std::cout << ss.str() << std::endl; +} + +void ArmNNExecutor::SetupInputsAndOutputs() +{ + const unsigned int noOfInputs = m_IOInfo.m_InputNames.size(); + + if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs) + { + LogAndThrow("Number of input names does not match number of inputs"); + } + + const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size(); + const std::vector& inputNames = m_Params.m_InputNames.size() != 0 ? + m_Params.m_InputNames : + m_IOInfo.m_InputNames; + unsigned int noInputSets = 1; + + if (inputFilePaths != 0) + { + if (inputFilePaths % noOfInputs != 0) + { + LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) + + " not compatible with number of inputs: " + std::to_string(noOfInputs)); + } + noInputSets = inputFilePaths / noOfInputs; + if (noInputSets != 1 && m_Params.m_ReuseBuffers) + { + LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers"); + } + } + + const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size(); + const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size(); + unsigned int noOutputSets = 1; + + if (outputFilePaths != 0) + { + if (outputFilePaths % noOfOutputs != 0) + { + LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) + + ", not compatible with number of outputs: " + std::to_string(noOfOutputs)); + } + noOutputSets = outputFilePaths / noOfOutputs; + + if (noOutputSets != 1 && m_Params.m_ReuseBuffers) + { + LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers"); + } + } + + if (m_Params.m_ThreadPoolSize != 0) + { + // The current implementation of the Threadpool does not allow binding of outputs to a thread + // So to ensure no two threads write to the same output at the same time, no output can be reused + noOutputSets = m_Params.m_Iterations; + } + + if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs) + { + ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required " + << "for each input. The user provided " + << m_Params.m_InputTensorDataFilePaths.size() + << " input-tensor-data file/s which will be used to fill the input/s.\n"; + } + + unsigned int inputCount = 0; + for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet) + { + armnn::InputTensors inputTensors; + for (const auto& inputName: inputNames) + { + armnn::BindingPointInfo bindingPointInfo; + try + { + bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName); + } + catch (const std::out_of_range& e) + { + LogAndThrow("Input with inputName: " + inputName + " not found."); + } + + const armnn::TensorInfo& tensorInfo = bindingPointInfo.second; + auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(), + tensorInfo.GetQuantizationScale(), + tensorInfo.GetQuantizationOffset(), + true}; + + m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()}); + + const int bindingId = bindingPointInfo.first; + inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem}); + + const armnn::Optional dataFile = m_Params.m_GenerateTensorData ? + armnn::EmptyOptional() : + armnn::MakeOptional( + m_Params.m_InputTensorDataFilePaths.at(inputCount++)); + + switch (tensorInfo.GetDataType()) + { + case armnn::DataType::Float32: + { + auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); + PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); + break; + } + case armnn::DataType::QSymmS16: + { + auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); + PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); + break; + } + case armnn::DataType::QSymmS8: + case armnn::DataType::QAsymmS8: + { + auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); + PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); + break; + } + case armnn::DataType::QAsymmU8: + { + auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); + PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); + break; + } + case armnn::DataType::Signed32: + { + auto typedTensor = reinterpret_cast(m_InputStorage.back().m_Mem); + PopulateTensorWithData(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName); + break; + } + default: + { + LogAndThrow("Unexpected DataType"); + } + } + + if (m_Params.m_ImportInputsIfAligned) + { + m_ImportedInputIds.push_back( + m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc)); + } + } + m_InputTensorsVec.emplace_back(inputTensors); + } + + for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet) + { + armnn::OutputTensors outputTensors; + for (const auto& output: m_IOInfo.m_OutputInfoMap) + { + const armnn::BindingPointInfo& bindingPointInfo = output.second; + const armnn::TensorInfo& tensorInfo = bindingPointInfo.second; + + m_OutputStorage.emplace_back(tensorInfo.GetNumBytes()); + outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem}); + } + m_OutputTensorsVec.emplace_back(outputTensors); + if (m_Params.m_ImportInputsIfAligned) + { + m_ImportedOutputIds.push_back( + m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc)); + } + } + + // Fill the remaining iterations with copies + const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets; + for (unsigned int i = 1; i <= remainingInputSets; i++) + { + m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]); + if (m_Params.m_ImportInputsIfAligned) + { + m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]); + } + } + + const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets; + for (unsigned int i = 1; i <= remainingOutputSets; i++) + { + m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]); + if (m_Params.m_ImportInputsIfAligned) + { + m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]); + } + } +} + +ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet) +{ + struct IOStrategy : armnn::IStrategy + { + void ExecuteStrategy(const armnn::IConnectableLayer* layer, + const armnn::BaseDescriptor& descriptor, + const std::vector& constants, + const char* name, + const armnn::LayerBindingId id = 0) override + { + armnn::IgnoreUnused(descriptor, constants, id); + switch (layer->GetType()) + { + case armnn::LayerType::Input: + { + m_IOInfo.m_InputNames.emplace_back(name); + m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()}; + break; + } + case armnn::LayerType::Output: + { + m_IOInfo.m_OutputNames.emplace_back(name); + m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()}; + break; + } + default: {} + } + } + IOInfo m_IOInfo; + }; + + IOStrategy ioStrategy; + optNet->ExecuteStrategy(ioStrategy); + + return ioStrategy.m_IOInfo; +} + +armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network) +{ + armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}}; + + armnn::OptimizerOptions options; + options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode; + options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode; + options.m_Debug = m_Params.m_PrintIntermediate; + options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ? + armnn::ShapeInferenceMethod::InferAndValidate : + armnn::ShapeInferenceMethod::ValidateOnly; + options.m_ProfilingEnabled = m_Params.m_EnableProfiling; + + armnn::BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", m_Params.m_EnableFastMath }, + { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork }, + { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath }, + { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath } + }); + + armnn::BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", m_Params.m_EnableFastMath }, + { "NumberOfThreads", m_Params.m_NumberOfThreads } + }); + options.m_ModelOptions.push_back(gpuAcc); + options.m_ModelOptions.push_back(cpuAcc); + + const auto optimization_start_time = armnn::GetTimeNow(); + optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options); + + ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2) + << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n"; + + if (!optNet) + { + LogAndThrow("Optimize returned nullptr"); + } + + return optNet; +} + +std::unique_ptr ArmNNExecutor::CreateParser() +{ + // If no model format is given check the file name + const std::string& modelFormat = m_Params.m_ModelPath; + + m_Params.m_IsModelBinary = modelFormat.find("json") == std::string::npos ? true : false; + std::unique_ptr parser = nullptr; + // Forward to implementation based on the parser type + if (modelFormat.find("armnn") != std::string::npos) + { +#if defined(ARMNN_SERIALIZER) + parser = std::make_unique(); +#else + LogAndThrow("Not built with serialization support."); +#endif + } + else if(modelFormat.find("tflite") != std::string::npos) + { +#if defined(ARMNN_TF_LITE_PARSER) + parser = std::make_unique(m_Params); +#else + LogAndThrow("Not built with Tensorflow-Lite parser support."); +#endif + } + else if (modelFormat.find("onnx") != std::string::npos) + { +#if defined(ARMNN_ONNX_PARSER) + parser = std::make_unique(); +#else + LogAndThrow("Not built with Onnx parser support."); +#endif + } + + return parser; +} + +void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors, + unsigned int iteration) +{ + auto findOutputName = [&](const armnn::LayerBindingId id) + { + for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it) + { + if (id == it->second.first) + { + return it->first; + } + } + return std::string{}; + }; + + unsigned int outputIndex = 0; + unsigned int numOutputs = outputTensors->size(); + for (const auto& output: *outputTensors) + { + const auto bindingName = findOutputName(output.first); + // We've made sure before that the number of output files either equals numOutputs, in which + // case we override those files when processing the results of each iteration (only the result + // of the last iteration will be stored), or there are enough + // output files for each output of each iteration. + size_t outputFileIndex = iteration * numOutputs + outputIndex; + if (!m_Params.m_OutputTensorFiles.empty()) + { + outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size(); + ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '" + << output.first + << "' of iteration: " << iteration + 1 << " to file: '" + << m_Params.m_OutputTensorFiles[outputFileIndex] << "'"; + } + + const armnn::Optional outputTensorFile = m_Params.m_OutputTensorFiles.empty() ? + armnn::EmptyOptional() : + armnn::MakeOptional( + m_Params.m_OutputTensorFiles[outputFileIndex]); + + OutputWriteInfo outputWriteInfo + { + outputTensorFile, + bindingName, + output.second, + !m_Params.m_DontPrintOutputs + }; + + std::cout << bindingName << ": "; + std::vector values; + switch (output.second.GetDataType()) + { + case armnn::DataType::Float32: + { + PrintTensor(outputWriteInfo, "%f "); + break; + } + + case armnn::DataType::Signed32: + { + PrintTensor(outputWriteInfo, "%d "); + break; + } + case armnn::DataType::QSymmS8: + case armnn::DataType::QAsymmS8: + { + PrintTensor(outputWriteInfo, "%d "); + break; + } + case armnn::DataType::QAsymmU8: + { + PrintTensor(outputWriteInfo, "%d "); + break; + } + case armnn::DataType::Float16: + case armnn::DataType::QSymmS16: + case armnn::DataType::BFloat16: + case armnn::DataType::Boolean: + case armnn::DataType::Signed64: + default: + { + LogAndThrow("Unexpected DataType"); + } + } + std::cout << "\n"; + } +} + +void ArmNNExecutor::CompareAndPrintResult(std::vector otherOutput) +{ + unsigned int index = 0; + + for (const auto& outputTensors: m_OutputTensorsVec) + { + for (const auto& outputTensor: outputTensors) + { + float result = 0; + size_t size = outputTensor.second.GetNumBytes(); + + switch (outputTensor.second.GetDataType()) + { + case armnn::DataType::Float32: + { + result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); + break; + } + case armnn::DataType::QSymmS16: + { + result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); + break; + } + case armnn::DataType::QSymmS8: + { + result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); + break; + } + case armnn::DataType::QAsymmU8: + case armnn::DataType::QAsymmS8: + { + result = ComputeRMSE(outputTensor.second.GetMemoryArea(), otherOutput[index++], size); + break; + } + default: + { + LogAndThrow("Unexpected DataType"); + } + } + std::cout << "RMSE: of " << result << "\n"; + } + } +} +#if defined(ARMNN_SERIALIZER) +ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){} + +armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params) +{ + const std::string& modelPath = params.m_ModelPath; + + std::ifstream file(modelPath, std::ios::binary); + return m_Parser->CreateNetworkFromBinary(file); +} + +armnn::BindingPointInfo +ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName) +{ + armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName); + return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo}; +} + +armnn::BindingPointInfo +ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName) +{ + armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName); + return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo}; +} +#endif + +#if defined(ARMNN_TF_LITE_PARSER) +ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params) +{ + armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options; + options.m_StandInLayerForUnsupported = params.m_ParseUnsupported; + options.m_InferAndValidate = params.m_InferOutputShape; + + m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options); +} + +armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params) +{ + const std::string& modelPath = params.m_ModelPath; + return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str()); +} + +armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId, + const std::string& inputName) +{ + return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName); +} + +armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId, + const std::string& outputName) +{ + return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName); +} +#endif + + +#if defined(ARMNN_ONNX_PARSER) +ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){} + +armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params) +{ + const std::string& modelPath = params.m_ModelPath; + m_Parser = armnnOnnxParser::IOnnxParser::Create(); + std::map inputShapes; + if(!params.m_InputTensorShapes.empty()) + { + const size_t numInputShapes = params.m_InputTensorShapes.size(); + const size_t numInputBindings = params.m_InputNames.size(); + if(numInputShapes < numInputBindings) + { + throw armnn::Exception( + fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}", + numInputBindings, numInputShapes)); + } + + for (size_t i = 0; i < numInputShapes; i++) + { + inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i]; + } + + return params.m_IsModelBinary ? + m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) : + m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes); + } + + // Handle text and binary input differently by calling the corresponding parser function + return params.m_IsModelBinary ? + m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) : + m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str()); +} + +armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName) +{ + return m_Parser->GetNetworkInputBindingInfo(inputName); +} + +armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName) +{ + return m_Parser->GetNetworkOutputBindingInfo(outputName); +} +#endif diff --git a/tests/ExecuteNetwork/ArmNNExecutor.hpp b/tests/ExecuteNetwork/ArmNNExecutor.hpp new file mode 100644 index 0000000000..c4adc9e120 --- /dev/null +++ b/tests/ExecuteNetwork/ArmNNExecutor.hpp @@ -0,0 +1,161 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "IExecutor.hpp" +#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" +#include "ExecuteNetworkProgramOptions.hpp" +#include "armnn/utility/NumericCast.hpp" +#include "armnn/utility/Timer.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#if defined(ARMNN_SERIALIZER) +#include "armnnDeserializer/IDeserializer.hpp" +#endif +#if defined(ARMNN_TF_LITE_PARSER) +#include +#endif +#if defined(ARMNN_ONNX_PARSER) +#include +#endif + +class ArmNNExecutor : public IExecutor +{ +public: + ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions); + + std::vector Execute() override; + void PrintNetworkInfo() override; + void CompareAndPrintResult(std::vector otherOutput) override; + +private: + + struct IParser; + struct IOInfo; + struct IOStorage; + + using BindingPointInfo = armnn::BindingPointInfo; + + std::unique_ptr CreateParser(); + + void ExecuteAsync(); + void ExecuteSync(); + void SetupInputsAndOutputs(); + + IOInfo GetIOInfo(armnn::IOptimizedNetwork* optNet); + + void PrintOutputTensors(const armnn::OutputTensors* outputTensors, unsigned int iteration); + + armnn::IOptimizedNetworkPtr OptimizeNetwork(armnn::INetwork* network); + + struct IOStorage + { + IOStorage(size_t size) + { + m_Mem = operator new(size); + } + ~IOStorage() + { + operator delete(m_Mem); + } + IOStorage(IOStorage&& rhs) + { + this->m_Mem = rhs.m_Mem; + rhs.m_Mem = nullptr; + } + + IOStorage(const IOStorage& rhs) = delete; + IOStorage& operator=(IOStorage& rhs) = delete; + IOStorage& operator=(IOStorage&& rhs) = delete; + + void* m_Mem; + }; + + struct IOInfo + { + std::vector m_InputNames; + std::vector m_OutputNames; + std::map m_InputInfoMap; + std::map m_OutputInfoMap; + }; + + IOInfo m_IOInfo; + std::vector m_InputStorage; + std::vector m_OutputStorage; + std::vector m_InputTensorsVec; + std::vector m_OutputTensorsVec; + std::vector> m_ImportedInputIds; + std::vector> m_ImportedOutputIds; + std::shared_ptr m_Runtime; + armnn::NetworkId m_NetworkId; + ExecuteNetworkParams m_Params; + + struct IParser + { + virtual armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) = 0; + virtual armnn::BindingPointInfo GetInputBindingPointInfo(size_t id, const std::string& inputName) = 0; + virtual armnn::BindingPointInfo GetOutputBindingPointInfo(size_t id, const std::string& outputName) = 0; + + virtual ~IParser(){}; + }; + +#if defined(ARMNN_SERIALIZER) + class ArmNNDeserializer : public IParser + { + public: + ArmNNDeserializer(); + + armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) override; + armnn::BindingPointInfo GetInputBindingPointInfo(size_t, const std::string& inputName) override; + armnn::BindingPointInfo GetOutputBindingPointInfo(size_t, const std::string& outputName) override; + + private: + armnnDeserializer::IDeserializerPtr m_Parser; + }; +#endif + +#if defined(ARMNN_TF_LITE_PARSER) + class TfliteParser : public IParser + { + public: + TfliteParser(const ExecuteNetworkParams& params); + + armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) override; + armnn::BindingPointInfo GetInputBindingPointInfo(size_t subgraphId, const std::string& inputName) override; + armnn::BindingPointInfo GetOutputBindingPointInfo(size_t subgraphId, const std::string& outputName) override; + + private: + armnnTfLiteParser::ITfLiteParserPtr m_Parser{nullptr, [](armnnTfLiteParser::ITfLiteParser*){}}; + }; +#endif + +#if defined(ARMNN_ONNX_PARSER) + class OnnxParser : public IParser + { + public: + OnnxParser(); + + armnn::INetworkPtr CreateNetwork(const ExecuteNetworkParams& params) override; + armnn::BindingPointInfo GetInputBindingPointInfo(size_t subgraphId, const std::string& inputName) override; + armnn::BindingPointInfo GetOutputBindingPointInfo(size_t subgraphId, const std::string& outputName) override; + + private: + armnnOnnxParser::IOnnxParserPtr m_Parser; + }; +#endif +}; \ No newline at end of file diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index 02a21c30cf..e9ebd0db8e 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -1,1077 +1,91 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // -#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" #include "ExecuteNetworkProgramOptions.hpp" -#include -#include - -#include -#include -#include -#include -#include - -#if defined(ARMNN_SERIALIZER) -#include "armnnDeserializer/IDeserializer.hpp" -#endif -#if defined(ARMNN_TF_LITE_PARSER) -#include "armnnTfLiteParser/ITfLiteParser.hpp" -#endif -#if defined(ARMNN_ONNX_PARSER) -#include "armnnOnnxParser/IOnnxParser.hpp" -#endif +#include "ArmNNExecutor.hpp" #if defined(ARMNN_TFLITE_DELEGATE) -#include -#include - -#include -#include -#include -#include -#include -#include -#include +#include "TfliteExecutor.hpp" #endif +#include -#include -/** - * Given a measured duration and a threshold time tell the user whether we succeeded or not. - * - * @param duration the measured inference duration. - * @param thresholdTime the threshold time in milliseconds. - * @return false if the measured time exceeded the threshold. - */ -bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, - const double& thresholdTime) +std::unique_ptr BuildExecutor(ProgramOptions& programOptions) { - ARMNN_LOG(info) << "Inference time: " << std::setprecision(2) - << std::fixed << duration.count() << " ms\n"; - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (thresholdTime != 0.0) + if (programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate || + programOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << thresholdTime << " ms"; - auto thresholdMinusInference = thresholdTime - duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - if (thresholdMinusInference < 0) - { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - return false; - } - } - return true; -} - #if defined(ARMNN_TFLITE_DELEGATE) -int TfLiteDelegateMainImpl(const ExecuteNetworkParams& params, const armnn::IRuntime::CreationOptions runtimeOptions) -{ - // Build model and corresponding interpreter - using namespace tflite; - - std::unique_ptr model = tflite::FlatBufferModel::BuildFromFile(params.m_ModelPath.c_str()); - - auto tfLiteInterpreter = std::make_unique(); - tflite::ops::builtin::BuiltinOpResolver resolver; - - tflite::InterpreterBuilder builder(*model, resolver); - builder(&tfLiteInterpreter); - tfLiteInterpreter->AllocateTensors(); - - int status = 0; - - // Create & populate Armnn Delegate, then register it to TfLiteInterpreter - if (params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate) - { - // Create the Armnn Delegate - // Populate a DelegateOptions from the ExecuteNetworkParams. - armnnDelegate::DelegateOptions delegateOptions = params.ToDelegateOptions(); - delegateOptions.SetExternalProfilingParams( - arm::pipe::ConvertExternalProfilingOptions(runtimeOptions.m_ProfilingOptions)); - - std::unique_ptr - theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), - armnnDelegate::TfLiteArmnnDelegateDelete); - // Register armnn_delegate to TfLiteInterpreter - status = tfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate)); - if (status != kTfLiteOk) - { - ARMNN_LOG(fatal) << "Could not register ArmNN TfLite Delegate to TfLiteInterpreter!"; - return EXIT_FAILURE; - } - } - else - { - std::cout << "Running on TfLite without ArmNN delegate\n"; - } - - const size_t numInputs = params.m_InputNames.size(); - // Populate input tensor of interpreter - for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex) - { - // Load (or generate) input data for inference - armnn::Optional dataFile = params.m_GenerateTensorData ? armnn::EmptyOptional() : - armnn::MakeOptional(params.m_InputTensorDataFilePaths[inputIndex]); - - int input = tfLiteInterpreter->inputs()[inputIndex]; - TfLiteIntArray* inputDims = tfLiteInterpreter->tensor(input)->dims; - - unsigned int inputSize = 1; - if (params.m_InputTensorShapes.size() > 0) - { - inputSize = params.m_InputTensorShapes[inputIndex]->GetNumElements(); - } - else - { - for (unsigned int dim = 0; dim < static_cast(inputDims->size); ++dim) - { - inputSize *= inputDims->data[dim]; - } - } - - if (params.m_InputTypes[inputIndex].compare("float") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return std::stof(s); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else if (params.m_InputTypes[inputIndex].compare("qsymms8") == 0 || - params.m_InputTypes[inputIndex].compare("qasymms8") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return armnn::numeric_cast(std::stoi(s)); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else if (params.m_InputTypes[inputIndex].compare("int") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return std::stoi(s); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else if (params.m_InputTypes[inputIndex].compare("qasymm8") == 0 || - params.m_InputTypes[inputIndex].compare("qasymmu8") == 0) - { - auto inputData = tfLiteInterpreter->typed_tensor(input); - - if(inputData == NULL) - { - ARMNN_LOG(fatal) << "Input tensor is null, input type: " - "\"" << params.m_InputTypes[inputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - std::vector tensorData; - PopulateTensorWithDataGeneric(tensorData, - inputSize, - dataFile, - [](const std::string& s) - { return armnn::numeric_cast(std::stoi(s)); }); - - std::copy(tensorData.begin(), tensorData.end(), inputData); - } - else - { - ARMNN_LOG(fatal) << "Unsupported input tensor data type \"" << params.m_InputTypes[inputIndex] << "\". "; - return EXIT_FAILURE; - } - } - - // Run inference, print the output of the inference - for (size_t x = 0; x < params.m_Iterations; x++) - { - // Start timer to record inference time in milliseconds. - const auto start_time = armnn::GetTimeNow(); - // Run the inference - status = tfLiteInterpreter->Invoke(); - const auto duration = armnn::GetTimeDuration(start_time); - - // The TFLite interpreter's outputs might be in a different order than the user inputted output names. - std::map paramToTfliteOutputIndex; - for (unsigned int paramIndex = 0; paramIndex < params.m_OutputNames.size(); ++paramIndex) - { - paramToTfliteOutputIndex[paramIndex] = -1; - for (unsigned int tfLiteIndex = 0; tfLiteIndex < tfLiteInterpreter->outputs().size(); ++tfLiteIndex) - { - if (params.m_OutputNames[paramIndex] == tfLiteInterpreter->GetOutputName(tfLiteIndex)) - { - paramToTfliteOutputIndex[paramIndex] = tfLiteIndex; - } - } - } - - // Print out the output - for (unsigned int paramOutputIndex = 0; paramOutputIndex < params.m_OutputNames.size(); ++paramOutputIndex) - { - int outputIndex = paramToTfliteOutputIndex[paramOutputIndex]; - if (outputIndex == -1) - { - std::cout << fmt::format("Output name: {} doesn't exist.", params.m_OutputNames[paramOutputIndex]) << - std::endl; - continue; - } - auto tfLiteDelegateOutputId = tfLiteInterpreter->outputs()[outputIndex]; - TfLiteIntArray* outputDims = tfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims; - // If we've been asked to write to a file then set a file output stream. Otherwise use stdout. - FILE* outputTensorFile = stdout; - if (!params.m_OutputTensorFiles.empty()) - { - outputTensorFile = fopen(params.m_OutputTensorFiles[outputIndex].c_str(), "w"); - if (outputTensorFile == NULL) - { - ARMNN_LOG(fatal) << "Specified output tensor file, \"" << - params.m_OutputTensorFiles[outputIndex] << - "\", cannot be created. Defaulting to stdout. " << - "Error was: " << std::strerror(errno); - outputTensorFile = stdout; - } - else - { - ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '" - << params.m_OutputTensorFiles[outputIndex] << "'"; - } - } - long outputSize = 1; - for (unsigned int dim = 0; dim < static_cast(outputDims->size); ++dim) - { - outputSize *= outputDims->data[dim]; - } - - std::cout << tfLiteInterpreter->GetOutputName(outputIndex) << ": "; - if (params.m_OutputTypes[paramOutputIndex].compare("float") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]); - } - } - } - else if (params.m_OutputTypes[paramOutputIndex].compare("int") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); - } - } - } - else if (params.m_OutputTypes[paramOutputIndex].compare("qsymms8") == 0 || - params.m_OutputTypes[paramOutputIndex].compare("qasymms8") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); - } - } - } - else if (params.m_OutputTypes[paramOutputIndex].compare("qasymm8") == 0 || - params.m_OutputTypes[paramOutputIndex].compare("qasymmu8") == 0) - { - auto tfLiteDelageOutputData = tfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); - if(tfLiteDelageOutputData == NULL) - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << "\" may be incorrect."; - return EXIT_FAILURE; - } - - if (!params.m_DontPrintOutputs) - { - for (int i = 0; i < outputSize; ++i) - { - fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]); - } - } - } - else - { - ARMNN_LOG(fatal) << "Output tensor is null, output type: " - "\"" << params.m_OutputTypes[paramOutputIndex] << - "\" may be incorrect. Output type can be specified with -z argument"; - return EXIT_FAILURE; - } - std::cout << std::endl; - } - CheckInferenceTimeThreshold(duration, params.m_ThresholdTime); - } - - return status; -} + return std::make_unique(programOptions.m_ExNetParams); +#else + ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; + return nullptr; #endif -template -int MainImpl(const ExecuteNetworkParams& params, - const std::shared_ptr& runtime = nullptr) -{ - using namespace std::chrono; - - std::vector> inputs; - std::vector> outputs; - - try - { - // Creates an InferenceModel, which will parse the model and load it into an IRuntime. - typename InferenceModel::Params inferenceModelParams; - inferenceModelParams.m_ModelPath = params.m_ModelPath; - inferenceModelParams.m_AllowExpandedDims = params.m_AllowExpandedDims; - inferenceModelParams.m_IsModelBinary = params.m_IsModelBinary; - inferenceModelParams.m_ComputeDevices = params.m_ComputeDevices; - inferenceModelParams.m_DynamicBackendsPath = params.m_DynamicBackendsPath; - inferenceModelParams.m_PrintIntermediateLayers = params.m_PrintIntermediate; - inferenceModelParams.m_VisualizePostOptimizationModel = params.m_EnableLayerDetails; - inferenceModelParams.m_ParseUnsupported = params.m_ParseUnsupported; - inferenceModelParams.m_InferOutputShape = params.m_InferOutputShape; - inferenceModelParams.m_EnableFastMath = params.m_EnableFastMath; - inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork; - inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; - inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; - inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; - inferenceModelParams.m_AsyncEnabled = params.m_Concurrent; - inferenceModelParams.m_ThreadPoolSize = params.m_ThreadPoolSize; - inferenceModelParams.m_OutputDetailsToStdOut = params.m_OutputDetailsToStdOut; - inferenceModelParams.m_OutputDetailsOnlyToStdOut = params.m_OutputDetailsOnlyToStdOut; - inferenceModelParams.m_ImportInputsIfAligned = params.m_ImportInputsIfAligned; - - for(const std::string& inputName: params.m_InputNames) - { - inferenceModelParams.m_InputBindings.push_back(inputName); - } - - for(unsigned int i = 0; i < params.m_InputTensorShapes.size(); ++i) - { - inferenceModelParams.m_InputShapes.push_back(*params.m_InputTensorShapes[i]); - } - - for(const std::string& outputName: params.m_OutputNames) - { - inferenceModelParams.m_OutputBindings.push_back(outputName); - } - - inferenceModelParams.m_SubgraphId = params.m_SubgraphId; - inferenceModelParams.m_EnableFp16TurboMode = params.m_EnableFp16TurboMode; - inferenceModelParams.m_EnableBf16TurboMode = params.m_EnableBf16TurboMode; - - InferenceModel model(inferenceModelParams, - params.m_EnableProfiling, - params.m_DynamicBackendsPath, - runtime); - - const size_t numInputs = inferenceModelParams.m_InputBindings.size(); - - armnn::Optional qParams = params.m_QuantizeInput ? - armnn::MakeOptional( - model.GetInputQuantizationParams()) : - armnn::EmptyOptional(); - - if (params.m_InputTensorDataFilePaths.size() > numInputs) - { - ARMNN_LOG(info) << "Given network has " << numInputs << " input/s. One input-tensor-data file is required " - << "for each input. The user provided " - << params.m_InputTensorDataFilePaths.size() - << " input-tensor-data file/s which will be used to fill the input/s.\n"; - } - - const size_t numOutputs = inferenceModelParams.m_OutputBindings.size(); - - // The user is allowed to specify the data type of each output tensor. It is used here to construct the - // result tensors for each iteration. It is possible for the user to specify a type that does not match - // the data type of the corresponding model output. It may not make sense, but it is historically allowed. - // The potential problem here is a buffer overrun when a larger data type is written into the space for a - // smaller one. Issue a warning to highlight the potential problem. - for (unsigned int outputIdx = 0; outputIdx < model.GetOutputBindingInfos().size(); ++outputIdx) - { - armnn::DataType type = model.GetOutputBindingInfo(outputIdx).second.GetDataType(); - switch (type) - { - // --output-type only supports float, int, qasymms8 or qasymmu8. - case armnn::DataType::Float32: - if (params.m_OutputTypes[outputIdx].compare("float") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Float32. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - case armnn::DataType::QAsymmU8: - if (params.m_OutputTypes[outputIdx].compare("qasymmu8") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmU8. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - case armnn::DataType::Signed32: - if (params.m_OutputTypes[outputIdx].compare("int") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type Signed32. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - case armnn::DataType::QAsymmS8: - if (params.m_OutputTypes[outputIdx].compare("qasymms8") != 0) - { - ARMNN_LOG(warning) << "Model output index: " << outputIdx << " has data type QAsymmS8. The " - << "corresponding --output-type is " << params.m_OutputTypes[outputIdx] << - ". This may cause unexpected problems or random failures."; - } - break; - default: - break; - } - } - - if (!params.m_ReuseBuffers) - { - for (unsigned int j = 0; j < params.m_Iterations; ++j) - { - std::vector inputDataContainers; - for (unsigned int i = 0; i < numInputs; ++i) - { - // If there are fewer input files given than required for the execution of - // params.m_Iterations we simply start with the first input file again - size_t inputFileIndex = j * numInputs + i; - if (!params.m_InputTensorDataFilePaths.empty()) - { - inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); - } - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths.at( - inputFileIndex)); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) - { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } - - armnnUtils::TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); - - inputDataContainers.push_back(tensorData); - } - inputs.push_back(inputDataContainers); - } - - for (unsigned int j = 0; j < params.m_Iterations; ++j) - { - std::vector outputDataContainers; - for (unsigned int i = 0; i < numOutputs; ++i) - { - if (params.m_OutputTypes[i].compare("float") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("int") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || - params.m_OutputTypes[i].compare("qasymmu8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else if (params.m_OutputTypes[i].compare("qasymms8") == 0) - { - outputDataContainers.push_back(std::vector(model.GetOutputSize(i))); - } - else - { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; - } - } - outputs.push_back(outputDataContainers); - } - } - if (params.m_Iterations > 1) - { - std::stringstream msg; - msg << "Network will be executed " << params.m_Iterations; - if (params.m_Concurrent) - { - msg << " times in an asynchronous manner. "; - } - else - { - msg << " times successively. "; - } - msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to " - "cover each execution."; - ARMNN_LOG(info) << msg.str(); - } - - // Synchronous execution - if (!params.m_Concurrent && !params.m_ReuseBuffers) - { - for (size_t x = 0; x < params.m_Iterations; x++) - { - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(inputs[x], outputs[x]); - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo& infoOut = infosOut[i].second; - - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the result - // of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = x * numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] - << "' of iteration: " << x+1 << " to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, outputs[x][i]); - } - - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference_duration.count() << " ms\n"; - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference_duration.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - - if (thresholdMinusInference < 0) - { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - } - } - } - } - // Synchronous Execution using a single buffer for input and output data - else if(!params.m_Concurrent) - { - std::vector input; - std::vector output; - - for (unsigned int i = 0; i < numInputs; ++i) - { - // If there are fewer input files given than required for the execution of - // params.m_Iterations we simply start with the first input file again - size_t inputFileIndex = numInputs + i; - if (!params.m_InputTensorDataFilePaths.empty()) - { - inputFileIndex = inputFileIndex % params.m_InputTensorDataFilePaths.size(); - } - - armnn::Optional dataFile = params.m_GenerateTensorData ? - armnn::EmptyOptional() : - armnn::MakeOptional( - params.m_InputTensorDataFilePaths.at( - inputFileIndex)); - - unsigned int numElements = model.GetInputSize(i); - if (params.m_InputTensorShapes.size() > i && params.m_InputTensorShapes[i]) - { - // If the user has provided a tensor shape for the current input, - // override numElements - numElements = params.m_InputTensorShapes[i]->GetNumElements(); - } - - armnnUtils::TContainer tensorData; - PopulateTensorWithData(tensorData, - numElements, - params.m_InputTypes[i], - qParams, - dataFile); - - input.push_back(tensorData); - } - - for (unsigned int i = 0; i < numOutputs; ++i) - { - if (params.m_OutputTypes[i].compare("float") == 0) - { - output.push_back(std::vector(model.GetOutputSize(i))); - } else if (params.m_OutputTypes[i].compare("int") == 0) { - output.push_back(std::vector(model.GetOutputSize(i))); - } else if (params.m_OutputTypes[i].compare("qasymm8") == 0 || - params.m_OutputTypes[i].compare("qasymmu8") == 0) - { - output.push_back(std::vector(model.GetOutputSize(i))); - } else if (params.m_OutputTypes[i].compare("qasymms8") == 0) - { - output.push_back(std::vector(model.GetOutputSize(i))); - } else { - ARMNN_LOG(fatal) << "Unsupported tensor data type \"" << params.m_OutputTypes[i] << "\". "; - return EXIT_FAILURE; - } - } - - std::vector> timings; - timings.reserve(params.m_Iterations); - for (size_t x = 0; x < params.m_Iterations; x++) - { - // model.Run returns the inference time elapsed in EnqueueWorkload (in milliseconds) - auto inference_duration = model.Run(input, output); - timings.push_back(inference_duration); - } - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output. This only needs to happen once as input is the same for each iteration. - const auto &infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - const armnn::TensorInfo &infoOut = infosOut[i].second; - - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the result - // of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] <<" to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, output[i]); - } - - for(auto inference: timings) - { - - ARMNN_LOG(info) << "\nInference time: " << std::setprecision(2) - << std::fixed << inference.count() << " ms\n"; - - // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line - if (params.m_ThresholdTime != 0.0) - { - ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) - << std::fixed << params.m_ThresholdTime << " ms"; - auto thresholdMinusInference = params.m_ThresholdTime - inference.count(); - ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) - << std::fixed << thresholdMinusInference << " ms" << "\n"; - - if (thresholdMinusInference < 0) - { - std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; - ARMNN_LOG(fatal) << errorMessage; - } - } - } - } - - // Asynchronous execution using the Arm NN thread pool - else if (params.m_ThreadPoolSize >= 1) - { - try - { - ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n"; - armnn::AsyncCallbackManager callbackManager; - std::unordered_map&> inferenceOutputMap; - - // Declare the latest and earliest inference times here to be used when calculating overall time - std::chrono::high_resolution_clock::time_point earliestStartTime; - std::chrono::high_resolution_clock::time_point latestEndTime = - std::chrono::high_resolution_clock::now(); - - // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the - // LoadedNetwork with each scheduled inference having a specific priority - for (size_t i = 0; i < params.m_Iterations; ++i) - { - std::shared_ptr cb = callbackManager.GetNewCallback(); - inferenceOutputMap.insert({cb->GetInferenceId(), outputs[i]}); - model.RunAsync(inputs[i], outputs[i], cb); - } - - // Check the results - unsigned int j = 0; - for (size_t iteration = 0; iteration < params.m_Iterations; ++iteration) - { - auto cb = callbackManager.GetNotifiedCallback(); - - // Get the results - auto endTime = time_point_cast(cb->GetEndTime()); - auto startTime = time_point_cast(cb->GetStartTime()); - auto inferenceDuration = endTime - startTime; - - if (latestEndTime < cb->GetEndTime()) - { - latestEndTime = cb->GetEndTime(); - } - - if (earliestStartTime.time_since_epoch().count() == 0) - { - earliestStartTime = cb->GetStartTime(); - } - else if (earliestStartTime > cb->GetStartTime()) - { - earliestStartTime = cb->GetStartTime(); - } - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the - // result of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = iteration * numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] - << "' of iteration: " << iteration+1 << " to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, inferenceOutputMap.at(cb->GetInferenceId())[i]); - } - - CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); - ++j; - } - //print duration difference between overallStartTime and overallEndTime - auto overallEndTime = time_point_cast(latestEndTime); - auto overallStartTime = time_point_cast(earliestStartTime); - auto totalInferenceDuration = overallEndTime - overallStartTime; - ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) - << std::fixed << totalInferenceDuration.count() << " ms\n"; - } - catch (const armnn::Exception& e) - { - ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); - return EXIT_FAILURE; - } - } - // Asynchronous execution using std::launch::async - else - { - try - { - ARMNN_LOG(info) << "Asynchronous Execution with std::launch:async... \n"; - std::vector>>> inferenceResults; - inferenceResults.reserve(params.m_Iterations); - - // Create WorkingMemHandles for each inference - std::vector> workingMemHandles; - workingMemHandles.reserve(params.m_Iterations); - for (unsigned int i = 0; i < params.m_Iterations; ++i) - { - workingMemHandles.push_back(model.CreateWorkingMemHandle()); - } - - // Run each inference in its own thread - // start a timer - const auto start_time = armnn::GetTimeNow(); - for (unsigned int i = 0; i < params.m_Iterations; ++i) - { - armnn::experimental::IWorkingMemHandle& workingMemHandleRef = *workingMemHandles[i].get(); - - inferenceResults.push_back(std::async( - std::launch::async, [&model, &workingMemHandleRef, &inputs, &outputs, i]() { - return model.RunAsync(workingMemHandleRef, inputs[i], outputs[i], i); - } - )); - } - - // Check the results - for (unsigned int j = 0; j < inferenceResults.size(); ++j) - { - // Get the results - auto inferenceResult = inferenceResults[j].get(); - auto inferenceDuration = std::get<1>(inferenceResult); - auto inferenceID = std::get<0>(inferenceResult); - - if (params.m_GenerateTensorData) - { - ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful"; - } - if (params.m_DontPrintOutputs) - { - ARMNN_LOG(info) << "Printing outputs to console is disabled."; - } - - // Print output tensors - const auto& infosOut = model.GetOutputBindingInfos(); - for (size_t i = 0; i < numOutputs; i++) - { - // We've made sure before that the number of output files either equals numOutputs, in which - // case we override those files when processing the results of each iteration (only the - // result of the last iteration will be stored), or there are enough - // output files for each output of each iteration. - size_t outputFileIndex = j * numOutputs + i; - if (!params.m_OutputTensorFiles.empty()) - { - outputFileIndex = outputFileIndex % params.m_OutputTensorFiles.size(); - ARMNN_LOG(info) << "Writing output " << i << " named: '" - << inferenceModelParams.m_OutputBindings[i] - << "' of iteration: " << j+1 << " to file: '" - << params.m_OutputTensorFiles[outputFileIndex] << "'"; - } - const armnn::TensorInfo& infoOut = infosOut[i].second; - auto outputTensorFile = params.m_OutputTensorFiles.empty() - ? "" - : params.m_OutputTensorFiles[outputFileIndex]; - - TensorPrinter printer(inferenceModelParams.m_OutputBindings[i], - infoOut, - outputTensorFile, - params.m_DequantizeOutput, - !params.m_DontPrintOutputs); - mapbox::util::apply_visitor(printer, outputs[j][i]); - } - CheckInferenceTimeThreshold(inferenceDuration, params.m_ThresholdTime); - ARMNN_LOG(info) << "Asynchronous Execution is finished for Inference ID: " << inferenceID << " \n"; - } - // finish timer - const auto duration = armnn::GetTimeDuration(start_time); - ARMNN_LOG(info) << "\nOverall Inference time: " << std::setprecision(2) - << std::fixed << duration.count() << " ms\n"; - } - catch (const armnn::Exception& e) - { - ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); - return EXIT_FAILURE; - } - } } - catch (const armnn::Exception& e) + else { - ARMNN_LOG(fatal) << "Armnn Error: " << e.what(); - return EXIT_FAILURE; + return std::make_unique(programOptions.m_ExNetParams, programOptions.m_RuntimeOptions); } - - return EXIT_SUCCESS; } // MAIN int main(int argc, const char* argv[]) { // Configures logging for both the ARMNN library and this test program. - #ifdef NDEBUG +#ifdef NDEBUG armnn::LogSeverity level = armnn::LogSeverity::Info; - #else +#else armnn::LogSeverity level = armnn::LogSeverity::Debug; - #endif +#endif armnn::ConfigureLogging(true, true, level); // Get ExecuteNetwork parameters and runtime options from command line // This might throw an InvalidArgumentException if the user provided invalid inputs - ProgramOptions ProgramOptions; - try { - ProgramOptions.ParseOptions(argc, argv); - } catch (const std::exception &e){ - ARMNN_LOG(fatal) << e.what(); - return EXIT_FAILURE; + ProgramOptions programOptions; + try + { + programOptions.ParseOptions(argc, argv); } - - if ((ProgramOptions.m_ExNetParams.m_OutputDetailsToStdOut || - ProgramOptions.m_ExNetParams.m_OutputDetailsOnlyToStdOut) - && !ProgramOptions.m_ExNetParams.m_EnableProfiling) + catch (const std::exception& e) { - ARMNN_LOG(fatal) << "You must enable profiling if you would like to output layer details"; + ARMNN_LOG(fatal) << e.what(); return EXIT_FAILURE; } - std::string modelFormat = ProgramOptions.m_ExNetParams.m_ModelFormat; + std::vector outputResults; - // Forward to implementation based on the parser type - if (modelFormat.find("armnn") != std::string::npos) - { - #if defined(ARMNN_SERIALIZER) - std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); - return MainImpl(ProgramOptions.m_ExNetParams, runtime); - #else - ARMNN_LOG(fatal) << "Not built with serialization support."; - return EXIT_FAILURE; - #endif - } - else if (modelFormat.find("onnx") != std::string::npos) + auto executor = BuildExecutor(programOptions); + if (!executor) { - #if defined(ARMNN_ONNX_PARSER) - std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); - return MainImpl(ProgramOptions.m_ExNetParams, runtime); - #else - ARMNN_LOG(fatal) << "Not built with Onnx parser support."; return EXIT_FAILURE; - #endif } - else if(modelFormat.find("tflite") != std::string::npos) + + executor->PrintNetworkInfo(); + outputResults = executor->Execute(); + + if (!programOptions.m_ExNetParams.m_ComparisonComputeDevices.empty() || + programOptions.m_ExNetParams.m_CompareWithTflite) { - if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteParser) + ExecuteNetworkParams comparisonParams = programOptions.m_ExNetParams; + comparisonParams.m_ComputeDevices = programOptions.m_ExNetParams.m_ComparisonComputeDevices; + + if (programOptions.m_ExNetParams.m_CompareWithTflite) { - #if defined(ARMNN_TF_LITE_PARSER) - std::shared_ptr runtime(armnn::IRuntime::Create(ProgramOptions.m_RuntimeOptions)); - return MainImpl(ProgramOptions.m_ExNetParams, runtime); - #else - ARMNN_LOG(fatal) << "Not built with Tensorflow-Lite parser support."; - return EXIT_FAILURE; - #endif + comparisonParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter; } - else if (ProgramOptions.m_ExNetParams.m_TfLiteExecutor == - ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate || - ProgramOptions.m_ExNetParams.m_TfLiteExecutor == - ExecuteNetworkParams::TfLiteExecutor::TfliteInterpreter) + + auto comparisonExecutor = BuildExecutor(programOptions); + + if (!comparisonExecutor) { - #if defined(ARMNN_TFLITE_DELEGATE) - return TfLiteDelegateMainImpl(ProgramOptions.m_ExNetParams, ProgramOptions.m_RuntimeOptions); - #else - ARMNN_LOG(fatal) << "Not built with Arm NN Tensorflow-Lite delegate support."; return EXIT_FAILURE; - #endif } - } - else - { - ARMNN_LOG(fatal) << "Unknown model format: '" << modelFormat - << "'. Please include 'tflite' or 'onnx'"; - return EXIT_FAILURE; + + comparisonExecutor->PrintNetworkInfo(); + comparisonExecutor->Execute(); + + comparisonExecutor->CompareAndPrintResult(outputResults); } } diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index cc75bb4323..f341c30738 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -1,76 +1,15 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ExecuteNetworkParams.hpp" #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" -#include #include #include - -bool IsModelBinary(const std::string& modelFormat) -{ - // Parse model binary flag from the model-format string we got from the command-line - if (modelFormat.find("binary") != std::string::npos) - { - return true; - } - else if (modelFormat.find("txt") != std::string::npos || modelFormat.find("text") != std::string::npos) - { - return false; - } - else - { - throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. " - "Please include 'binary' or 'text'", - modelFormat)); - } -} - -void CheckModelFormat(const std::string& modelFormat) -{ - // Forward to implementation based on the parser type - if (modelFormat.find("armnn") != std::string::npos) - { -#if defined(ARMNN_SERIALIZER) -#else - throw armnn::InvalidArgumentException("Can't run model in armnn format without a " - "built with serialization support."); -#endif - } - else if (modelFormat.find("onnx") != std::string::npos) - { -#if defined(ARMNN_ONNX_PARSER) -#else - throw armnn::InvalidArgumentException("Can't run model in onnx format without a " - "built with Onnx parser support."); -#endif - } - else if (modelFormat.find("tflite") != std::string::npos) - { -#if defined(ARMNN_TF_LITE_PARSER) - if (!IsModelBinary(modelFormat)) - { - throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. Only 'binary' " - "format supported for tflite files", - modelFormat)); - } -#elif defined(ARMNN_TFLITE_DELEGATE) -#else - throw armnn::InvalidArgumentException("Can't run model in tflite format without a " - "built with Tensorflow Lite parser support."); -#endif - } - else - { - throw armnn::InvalidArgumentException(fmt::format("Unknown model format: '{}'. " - "Please include 'tflite' or 'onnx'", - modelFormat)); - } -} +#include void CheckClTuningParameter(const int& tuningLevel, const std::string& tuningPath, @@ -105,7 +44,6 @@ void CheckClTuningParameter(const int& tuningLevel, ARMNN_LOG(warning) << "To use Cl Tuning the compute device GpuAcc needs to be active."; } } - } void ExecuteNetworkParams::ValidateParams() @@ -120,7 +58,6 @@ void ExecuteNetworkParams::ValidateParams() << invalidBackends; } } - CheckClTuningParameter(m_TuningLevel, m_TuningPath, m_ComputeDevices); if (m_EnableBf16TurboMode && m_EnableFp16TurboMode) @@ -129,10 +66,6 @@ void ExecuteNetworkParams::ValidateParams() "enabled at the same time."); } - m_IsModelBinary = IsModelBinary(m_ModelFormat); - - CheckModelFormat(m_ModelFormat); - // Check input tensor shapes if ((m_InputTensorShapes.size() != 0) && (m_InputTensorShapes.size() != m_InputNames.size())) @@ -157,68 +90,6 @@ void ExecuteNetworkParams::ValidateParams() m_InputNames.size(), m_InputTensorDataFilePaths.size())); } - else if (m_InputTensorDataFilePaths.size() % m_InputNames.size() != 0) - { - throw armnn::InvalidArgumentException( - fmt::format("According to the number of input names the user provided the network has {} " - "inputs. The user specified {} input-tensor-data file paths which is not " - "divisible by the number of inputs.", - m_InputNames.size(), - m_InputTensorDataFilePaths.size())); - } - } - - if (m_InputTypes.size() == 0) - { - //Defaults the value of all inputs to "float" - m_InputTypes.assign(m_InputNames.size(), "float"); - } - else if ((m_InputTypes.size() != 0) && - (m_InputTypes.size() != m_InputNames.size())) - { - throw armnn::InvalidArgumentException("input-name and input-type must have the same amount of elements."); - } - - // Make sure that the number of input files given is divisible by the number of inputs of the model - if (!(m_InputTensorDataFilePaths.size() % m_InputNames.size() == 0)) - { - throw armnn::InvalidArgumentException( - fmt::format("The number of input-tensor-data files ({0}) is not divisible by the " - "number of inputs ({1} according to the number of input names).", - m_InputTensorDataFilePaths.size(), - m_InputNames.size())); - } - - if (m_OutputTypes.size() == 0) - { - //Defaults the value of all outputs to "float" - m_OutputTypes.assign(m_OutputNames.size(), "float"); - } - else if ((m_OutputTypes.size() != 0) && - (m_OutputTypes.size() != m_OutputNames.size())) - { - throw armnn::InvalidArgumentException("output-name and output-type must have the same amount of elements."); - } - - // Make sure that the number of output files given is equal to the number of outputs of the model - // or equal to the number of outputs of the model multiplied with the number of iterations - if (!m_OutputTensorFiles.empty()) - { - if ((m_OutputTensorFiles.size() != m_OutputNames.size()) && - (m_OutputTensorFiles.size() != m_OutputNames.size() * m_Iterations)) - { - std::stringstream errmsg; - auto numOutputs = m_OutputNames.size(); - throw armnn::InvalidArgumentException( - fmt::format("The user provided {0} output-tensor files. The only allowed number of output-tensor " - "files is the number of outputs of the network ({1} according to the number of " - "output names) or the number of outputs multiplied with the number of times the " - "network should be executed (NumOutputs * NumIterations = {1} * {2} = {3}).", - m_OutputTensorFiles.size(), - numOutputs, - m_Iterations, - numOutputs*m_Iterations)); - } } // Check that threshold time is not less than zero @@ -310,4 +181,5 @@ armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const return delegateOptions; } + #endif diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index 5ef2b6ea7c..e60e3b8877 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -16,8 +16,6 @@ /// Check ExecuteNetworkProgramOptions.cpp for a description of each parameter struct ExecuteNetworkParams { - using TensorShapePtr = std::unique_ptr; - enum class TfLiteExecutor { ArmNNTfLiteParser, @@ -25,50 +23,48 @@ struct ExecuteNetworkParams TfliteInterpreter }; - bool m_AllowExpandedDims; - std::string m_CachedNetworkFilePath; - std::vector m_ComputeDevices; - bool m_Concurrent; - bool m_DequantizeOutput; - std::string m_DynamicBackendsPath; - bool m_EnableBf16TurboMode; - bool m_EnableFastMath = false; - bool m_EnableFp16TurboMode; - bool m_EnableLayerDetails = false; - bool m_EnableProfiling; - bool m_GenerateTensorData; - bool m_InferOutputShape = false; - bool m_EnableDelegate = false; - std::vector m_InputNames; - std::vector m_InputTensorDataFilePaths; - std::vector m_InputTensorShapes; - std::vector m_InputTypes; - bool m_IsModelBinary; - size_t m_Iterations; - std::string m_ModelFormat; - std::string m_ModelPath; - unsigned int m_NumberOfThreads; - bool m_OutputDetailsToStdOut; - bool m_OutputDetailsOnlyToStdOut; - std::vector m_OutputNames; - std::vector m_OutputTensorFiles; - std::vector m_OutputTypes; - bool m_ParseUnsupported = false; - bool m_PrintIntermediate; - bool m_DontPrintOutputs; - bool m_QuantizeInput; - bool m_SaveCachedNetwork; - size_t m_SimultaneousIterations; - size_t m_SubgraphId; - double m_ThresholdTime; - int m_TuningLevel; - std::string m_TuningPath; - std::string m_MLGOTuningFilePath; - TfLiteExecutor m_TfLiteExecutor; - size_t m_ThreadPoolSize; - bool m_ImportInputsIfAligned; - bool m_ReuseBuffers; - + bool m_AllowExpandedDims; + std::string m_CachedNetworkFilePath; + std::vector m_ComputeDevices; + bool m_Concurrent; + bool m_DequantizeOutput; + std::string m_DynamicBackendsPath; + bool m_EnableBf16TurboMode; + bool m_EnableFastMath = false; + bool m_EnableFp16TurboMode; + bool m_EnableLayerDetails = false; + bool m_EnableProfiling; + bool m_GenerateTensorData; + bool m_InferOutputShape = false; + bool m_EnableDelegate = false; + bool m_IsModelBinary; + std::vector m_InputNames; + std::vector m_InputTensorDataFilePaths; + std::vector m_InputTensorShapes; + size_t m_Iterations; + std::string m_ModelPath; + unsigned int m_NumberOfThreads; + bool m_OutputDetailsToStdOut; + bool m_OutputDetailsOnlyToStdOut; + std::vector m_OutputNames; + std::vector m_OutputTensorFiles; + bool m_ParseUnsupported = false; + bool m_PrintIntermediate; + bool m_DontPrintOutputs; + bool m_QuantizeInput; + bool m_SaveCachedNetwork; + size_t m_SubgraphId; + double m_ThresholdTime; + int m_TuningLevel; + std::string m_TuningPath; + std::string m_MLGOTuningFilePath; + TfLiteExecutor m_TfLiteExecutor; + size_t m_ThreadPoolSize; + bool m_ImportInputsIfAligned; + bool m_ReuseBuffers; + std::string m_ComparisonFile; + std::vector m_ComparisonComputeDevices; + bool m_CompareWithTflite; // Ensures that the parameters for ExecuteNetwork fit together void ValidateParams(); diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index ad35092c1d..de7bc051c7 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -1,11 +1,10 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ExecuteNetworkProgramOptions.hpp" #include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" -#include "InferenceTest.hpp" #include #include @@ -51,8 +50,6 @@ void CheckOptionDependency(const cxxopts::ParseResult& result, void CheckOptionDependencies(const cxxopts::ParseResult& result) { - CheckOptionDependency(result, "model-path", "model-format"); - CheckOptionDependency(result, "input-tensor-shape", "model-path"); CheckOptionDependency(result, "tuning-level", "tuning-path"); } @@ -119,10 +116,8 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result) // For each option in option-group "a) Required std::vector requiredOptions{"compute", - "model-format", - "model-path", - "input-name", - "output-name"}; + "model-path" + }; bool requiredMissing = false; for(auto const& str : requiredOptions) @@ -141,16 +136,42 @@ void CheckRequiredOptions(const cxxopts::ParseResult& result) void CheckForDeprecatedOptions(const cxxopts::ParseResult& result) { - if(result.count("simultaneous-iterations") > 0) - { - ARMNN_LOG(warning) << "DEPRECATED: The program option 'simultaneous-iterations' is deprecated and will be " - "removed soon. Please use the option 'iterations' combined with 'concurrent' instead."; - } if(result.count("armnn-tflite-delegate") > 0) { ARMNN_LOG(warning) << "DEPRECATED: The program option 'armnn-tflite-delegate' is deprecated and will be " "removed soon. Please use the option 'tflite-executor' instead."; } + if(result.count("concurrent") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'concurrent' is deprecated and will be " + "removed soon. Please use the option '\"P, thread-pool-size\"' instead."; + } + if(result.count("input-type") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'input-type' is deprecated and will be " + "removed soon. The input-types are now automatically set."; + } + if(result.count("input-name") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'input-name' is deprecated and will be " + "removed soon. The input-names are now automatically set."; + } + if(result.count("output-type") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'output-type' is deprecated and will be " + "removed soon. The output-types are now automatically set."; + } + if(result.count("output-name") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'output-name' is deprecated and will be " + "removed soon. The output-names are now automatically set."; + } + if(result.count("model-format") > 0) + { + ARMNN_LOG(warning) << "DEPRECATED: The program option 'model-format' is deprecated and will be " + "removed soon. The model-format is now automatically set."; + } + } void ProgramOptions::ValidateExecuteNetworkParams() @@ -182,12 +203,14 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "you can specify a second or third to fall back on. Possible choices: " + armnn::BackendRegistryInstance().GetBackendIdsAsString() + " NOTE: Multiple compute devices need to be passed as a comma separated list without whitespaces " - "e.g. GpuAcc,CpuAcc,CpuRef or by repeating the program option e.g. '-c Cpuacc -c CpuRef'. " + "e.g. GpuAcc,CpuAcc,CpuRef or by repeating the program option e.g. '-c CpuAcc -c CpuRef'. " "Duplicates are ignored.", cxxopts::value>()) ("f,model-format", - "armnn-binary, onnx-binary, onnx-text, tflite-binary", + "armnn-binary, onnx-binary, onnx-text, tflite-binary" + "DEPRECATED: The program option 'model-format' is deprecated and will be " + "removed soon. The model-format is now automatically set.", cxxopts::value()) ("m,model-path", @@ -195,11 +218,13 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value(m_ExNetParams.m_ModelPath)) ("i,input-name", - "Identifier of the input tensors in the network separated by comma.", + "Identifier of the input tensors in the network separated by comma." + "This option is not required, but can be used to set the order of inputs", cxxopts::value()) ("o,output-name", - "Identifier of the output tensors in the network separated by comma.", + "Identifier of the output tensors in the network separated by comma." + "This option is not required, but can be used to set the order of outputs", cxxopts::value()); m_CxxOptions.add_options("b) General") @@ -208,10 +233,16 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If left empty (the default), dynamic backends will not be used.", cxxopts::value(m_RuntimeOptions.m_DynamicBackendsPath)) + ("P, thread-pool-size", + "Run the network using the Arm NN thread pool with the number of threads provided. ", + cxxopts::value(m_ExNetParams.m_ThreadPoolSize)->default_value("0")) + ("n,concurrent", "This option is for Arm NN internal asynchronous testing purposes. " "False by default. If set to true will use std::launch::async or the Arm NN thread pool, " - "if 'thread-pool-size' is greater than 0, for asynchronous execution.", + "if 'thread-pool-size' is greater than 0, for asynchronous execution." + "DEPRECATED: The program option 'concurrent' is deprecated and will be " + "removed soon. Please use the option '\"P, thread-pool-size\"' instead.", cxxopts::value(m_ExNetParams.m_Concurrent)->default_value("false")->implicit_value("true")) ("d,input-tensor-data", @@ -233,9 +264,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "still match. This is an Experimental parameter that is incompatible with infer-output-shape. " "This parameter may be removed in a later update. ", cxxopts::value(m_ExNetParams.m_AllowExpandedDims)->default_value("false") - ->implicit_value("true")) + ->implicit_value("true")) - ("iterations", + ("I,iterations", "Number of iterations to run the network for, default is set to 1. " "If you wish to run the model with different input data for every execution you can do so by " "supplying more input file paths to the 'input-tensor-data' option. " @@ -256,7 +287,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("p,print-intermediate-layers", "If this option is enabled, the output of every graph layer will be printed.", cxxopts::value(m_ExNetParams.m_PrintIntermediate)->default_value("false") - ->implicit_value("true")) + ->implicit_value("true")) ("parse-unsupported", "Add unsupported operators as stand-in layers (where supported by parser)", @@ -272,6 +303,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If unset, default to not quantized. Accepted values (true or false)" " (Not available when executing ArmNNTfLiteDelegate or TfliteInterpreter)", cxxopts::value(m_ExNetParams.m_QuantizeInput)->default_value("false")->implicit_value("true")) + ("r,threshold-time", "Threshold time is the maximum allowed time for inference measured in milliseconds. If the actual " "inference time is greater than the threshold time, the test will fail. By default, no threshold " @@ -286,7 +318,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("v,visualize-optimized-model", "Enables built optimized model visualizer. If unset, defaults to off.", cxxopts::value(m_ExNetParams.m_EnableLayerDetails)->default_value("false") - ->implicit_value("true")) + ->implicit_value("true")) ("w,write-outputs-to-file", "Comma-separated list of output file paths keyed with the binding-id of the output slot. " @@ -301,13 +333,17 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("y,input-type", "The type of the input tensors in the network separated by comma. " "If unset, defaults to \"float\" for all defined inputs. " - "Accepted values (float, int, qasymms8 or qasymmu8).", + "Accepted values (float, int, qasymms8 or qasymmu8)." + "DEPRECATED: The program option 'input-type' is deprecated and will be " + "removed soon. The input-types are now automatically set.", cxxopts::value()) ("z,output-type", "The type of the output tensors in the network separated by comma. " "If unset, defaults to \"float\" for all defined outputs. " - "Accepted values (float, int, qasymms8 or qasymmu8).", + "Accepted values (float, int, qasymms8 or qasymmu8)." + "DEPRECATED: The program option 'output-type' is deprecated and will be " + "removed soon. The output-types are now automatically set.", cxxopts::value()) ("T,tflite-executor", @@ -317,30 +353,27 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "tflite is the TfliteInterpreter", cxxopts::value()->default_value("parser")) - ("D,armnn-tflite-delegate", - "Enable Arm NN TfLite delegate. " - "DEPRECATED: This option is deprecated please use tflite-executor instead", - cxxopts::value(m_ExNetParams.m_EnableDelegate)->default_value("false")->implicit_value("true")) - - ("simultaneous-iterations", - "Number of simultaneous iterations to async-run the network for, default is set to 1 (disabled). " - "When thread-pool-size is set the Arm NN thread pool is used. Otherwise std::launch::async is used." - "DEPRECATED: This option is deprecated and will be removed soon. " - "Please use the option 'iterations' combined with 'concurrent' instead.", - cxxopts::value(m_ExNetParams.m_SimultaneousIterations)->default_value("1")) - - ("thread-pool-size", + ("C, compare-output", "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. " "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the " "'concurrent' option is automatically set to true.", - cxxopts::value(m_ExNetParams.m_ThreadPoolSize)->default_value("0")); + cxxopts::value(m_ExNetParams.m_ComparisonFile)) + + ("B, compare-output-with-backend", + "Compare the output of the network with a different backend.", + cxxopts::value>()) + + ("A, compare-with-tflite", + "Compare the output of the network with the tflite ref model.", + cxxopts::value(m_ExNetParams.m_CompareWithTflite)->default_value("false") + ->implicit_value("true")); m_CxxOptions.add_options("c) Optimization") ("bf16-turbo-mode", "If this option is enabled, FP32 layers, " "weights and biases will be converted to BFloat16 where the backend supports it", cxxopts::value(m_ExNetParams.m_EnableBf16TurboMode) - ->default_value("false")->implicit_value("true")) + ->default_value("false")->implicit_value("true")) ("enable-fast-math", "Enables fast_math options in backends that support it. Using the fast_math flag can lead to " @@ -357,7 +390,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "Enables saving of the cached network to a file given with the cached-network-filepath option. " "See also --cached-network-filepath", cxxopts::value(m_ExNetParams.m_SaveCachedNetwork) - ->default_value("false")->implicit_value("true")) + ->default_value("false")->implicit_value("true")) ("cached-network-filepath", "If non-empty, the given file will be used to load/save the cached network. " @@ -371,7 +404,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", "If this option is enabled, FP32 layers, " "weights and biases will be converted to FP16 where the backend supports it", cxxopts::value(m_ExNetParams.m_EnableFp16TurboMode) - ->default_value("false")->implicit_value("true")) + ->default_value("false")->implicit_value("true")) ("tuning-level", "Sets the tuning level which enables a tuning run which will update/create a tuning file. " @@ -384,12 +417,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value(m_ExNetParams.m_TuningPath)) ("MLGOTuningFilePath", - "Path to tuning file. Enables use of CL MLGO tuning", - cxxopts::value(m_ExNetParams.m_MLGOTuningFilePath)) + "Path to tuning file. Enables use of CL MLGO tuning", + cxxopts::value(m_ExNetParams.m_MLGOTuningFilePath)) ("R, reuse-buffers", - "If enabled then the IO buffers will be reused for each inference", - cxxopts::value(m_ExNetParams.m_ReuseBuffers)->default_value("false")->implicit_value("true")); + "If enabled then the IO buffers will be reused for each inference", + cxxopts::value(m_ExNetParams.m_ReuseBuffers)->default_value("false")->implicit_value("true")); m_CxxOptions.add_options("d) Profiling") ("a,enable-external-profiling", @@ -404,7 +437,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("g,file-only-external-profiling", "If enabled then the 'file-only' test mode of external profiling will be enabled", cxxopts::value(m_RuntimeOptions.m_ProfilingOptions.m_FileOnly) - ->default_value("false")->implicit_value("true")) + ->default_value("false")->implicit_value("true")) ("file-format", "If profiling is enabled specifies the output file format", @@ -421,7 +454,7 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("timeline-profiling", "If enabled timeline profiling will be switched on, requires external profiling", cxxopts::value(m_RuntimeOptions.m_ProfilingOptions.m_TimelineEnabled) - ->default_value("false")->implicit_value("true")) + ->default_value("false")->implicit_value("true")) ("u,counter-capture-period", "If profiling is enabled in 'file-only' mode this is the capture period that will be used in the test", @@ -430,11 +463,12 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("output-network-details", "Outputs layer tensor infos and descriptors to std out along with profiling events. Defaults to off.", cxxopts::value(m_ExNetParams.m_OutputDetailsToStdOut)->default_value("false") - ->implicit_value("true")) + ->implicit_value("true")) + ("output-network-details-only", "Outputs layer tensor infos and descriptors to std out without profiling events. Defaults to off.", cxxopts::value(m_ExNetParams.m_OutputDetailsOnlyToStdOut)->default_value("false") - ->implicit_value("true")) + ->implicit_value("true")) ("import-inputs-if-aligned", "In & Out tensors will be imported per inference if the memory alignment allows. Defaults to false.", @@ -469,25 +503,25 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) CheckOptionDependencies(m_CxxResult); CheckForDeprecatedOptions(m_CxxResult); + if ((m_ExNetParams.m_OutputDetailsToStdOut || + m_ExNetParams.m_OutputDetailsOnlyToStdOut) && + !m_ExNetParams.m_EnableProfiling) + { + throw cxxopts::OptionParseException("You must enable profiling if you would like to output layer details"); + } + // Some options can't be assigned directly because they need some post-processing: auto computeDevices = GetOptionValue>("compute", m_CxxResult); m_ExNetParams.m_ComputeDevices = GetBackendIDs(computeDevices); - m_ExNetParams.m_ModelFormat = - armnn::stringUtils::StringTrimCopy(GetOptionValue("model-format", m_CxxResult)); m_ExNetParams.m_InputNames = ParseStringList(GetOptionValue("input-name", m_CxxResult), ","); m_ExNetParams.m_InputTensorDataFilePaths = ParseStringList(GetOptionValue("input-tensor-data", m_CxxResult), ","); m_ExNetParams.m_OutputNames = ParseStringList(GetOptionValue("output-name", m_CxxResult), ","); - m_ExNetParams.m_InputTypes = - ParseStringList(GetOptionValue("input-type", m_CxxResult), ","); - m_ExNetParams.m_OutputTypes = - ParseStringList(GetOptionValue("output-type", m_CxxResult), ","); m_ExNetParams.m_OutputTensorFiles = ParseStringList(GetOptionValue("write-outputs-to-file", m_CxxResult), ","); - m_ExNetParams.m_GenerateTensorData = - m_ExNetParams.m_InputTensorDataFilePaths.empty(); + m_ExNetParams.m_GenerateTensorData = m_ExNetParams.m_InputTensorDataFilePaths.empty(); m_ExNetParams.m_DynamicBackendsPath = m_RuntimeOptions.m_DynamicBackendsPath; m_RuntimeOptions.m_EnableGpuProfiling = m_ExNetParams.m_EnableProfiling; @@ -517,13 +551,13 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) { m_ExNetParams.m_TfLiteExecutor = ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate; } - if (m_ExNetParams.m_SimultaneousIterations > 1) + + // Set concurrent to true if the user expects to run inferences asynchronously + if (m_ExNetParams.m_Concurrent) { - m_ExNetParams.m_Iterations = m_ExNetParams.m_SimultaneousIterations; - m_ExNetParams.m_Concurrent = true; + m_ExNetParams.m_ThreadPoolSize = 1; } - // Set concurrent to true if the user expects to run inferences asynchronously if (m_ExNetParams.m_ThreadPoolSize > 0) { m_ExNetParams.m_Concurrent = true; @@ -543,7 +577,7 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) std::vector dims = ParseArray(ss); m_ExNetParams.m_InputTensorShapes.push_back( - std::make_unique(static_cast(dims.size()), dims.data())); + armnn::TensorShape{static_cast(dims.size()), dims.data()}); } } @@ -568,5 +602,12 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) } ValidateRuntimeOptions(); + + auto comparisonComputDevices = GetOptionValue>("compare-output-with-backend", m_CxxResult); + + if (!comparisonComputDevices.empty()) + { + m_ExNetParams.m_ComparisonComputeDevices = GetBackendIDs(comparisonComputDevices); + } } diff --git a/tests/ExecuteNetwork/IExecutor.hpp b/tests/ExecuteNetwork/IExecutor.hpp new file mode 100644 index 0000000000..4ed6cbde84 --- /dev/null +++ b/tests/ExecuteNetwork/IExecutor.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once +#include + +/// IExecutor executes a network +class IExecutor +{ +public: + /// Execute the given network + /// @return std::vector A type erased vector of the outputs, + /// that can be compared with the output of another IExecutor + virtual std::vector Execute() = 0; + /// Print available information about the network + virtual void PrintNetworkInfo() = 0; + /// Compare the output with the result of another IExecutor + virtual void CompareAndPrintResult(std::vector otherOutput) = 0; + virtual ~IExecutor(){}; +}; diff --git a/tests/ExecuteNetwork/TfliteExecutor.cpp b/tests/ExecuteNetwork/TfliteExecutor.cpp new file mode 100644 index 0000000000..dc495be5c3 --- /dev/null +++ b/tests/ExecuteNetwork/TfliteExecutor.cpp @@ -0,0 +1,251 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "TfliteExecutor.hpp" + +TfLiteExecutor::TfLiteExecutor(const ExecuteNetworkParams& params) : m_Params(params) +{ + std::unique_ptr model = + tflite::FlatBufferModel::BuildFromFile(m_Params.m_ModelPath.c_str()); + + m_TfLiteInterpreter = std::make_unique(); + tflite::ops::builtin::BuiltinOpResolver resolver; + + tflite::InterpreterBuilder builder(*model, resolver); + builder(&m_TfLiteInterpreter); + m_TfLiteInterpreter->AllocateTensors(); + + int status = kTfLiteError; + if (m_Params.m_TfLiteExecutor == ExecuteNetworkParams::TfLiteExecutor::ArmNNTfLiteDelegate) + { + // Create the Armnn Delegate + // Populate a DelegateOptions from the ExecuteNetworkParams. + armnnDelegate::DelegateOptions delegateOptions = m_Params.ToDelegateOptions(); + delegateOptions.SetExternalProfilingParams(delegateOptions.GetExternalProfilingParams()); + + std::unique_ptr + theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), + armnnDelegate::TfLiteArmnnDelegateDelete); + // Register armnn_delegate to TfLiteInterpreter + status = m_TfLiteInterpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate)); + if (status == kTfLiteError) + { + LogAndThrow("Could not register ArmNN TfLite Delegate to TfLiteInterpreter"); + } + } + else + { + std::cout << "Running on TfLite without ArmNN delegate\n"; + } + + armnn::Optional dataFile = m_Params.m_GenerateTensorData + ? armnn::EmptyOptional() + : armnn::MakeOptional(m_Params.m_InputTensorDataFilePaths[0]); + + const size_t numInputs = m_Params.m_InputNames.size(); + + for(unsigned int inputIndex = 0; inputIndex < numInputs; ++inputIndex) + { + int input = m_TfLiteInterpreter->inputs()[inputIndex]; + + TfLiteIntArray* inputDims = m_TfLiteInterpreter->tensor(input)->dims; + + unsigned int inputSize = 1; + for (unsigned int dim = 0; dim < static_cast(inputDims->size); ++dim) + { + inputSize *= inputDims->data[dim]; + } + + const auto& inputName = m_TfLiteInterpreter->input_tensor(input)->name; + const auto& dataType = m_TfLiteInterpreter->input_tensor(input)->type; + + switch (dataType) + { + case kTfLiteFloat32: + { + auto inputData = m_TfLiteInterpreter->typed_tensor(input); + PopulateTensorWithData(inputData, inputSize, dataFile, inputName); + break; + } + case kTfLiteInt32: + { + auto inputData = m_TfLiteInterpreter->typed_tensor(input); + PopulateTensorWithData(inputData, inputSize, dataFile, inputName); + break; + } + case kTfLiteUInt8: + { + auto inputData = m_TfLiteInterpreter->typed_tensor(input); + PopulateTensorWithData(inputData, inputSize, dataFile, inputName); + break; + } + case kTfLiteInt16: + { + auto inputData = m_TfLiteInterpreter->typed_tensor(input); + PopulateTensorWithData(inputData, inputSize, dataFile, inputName); + break; + } + case kTfLiteInt8: + { + auto inputData = m_TfLiteInterpreter->typed_tensor(input); + PopulateTensorWithData(inputData, inputSize, dataFile, inputName); + break; + } + default: + { + LogAndThrow("Unsupported input tensor data type"); + } + } + } +} + +std::vector TfLiteExecutor::Execute() +{ + int status = 0; + std::vector results; + for (size_t x = 0; x < m_Params.m_Iterations; x++) + { + // Start timer to record inference time in milliseconds. + const auto start_time = armnn::GetTimeNow(); + // Run the inference + status = m_TfLiteInterpreter->Invoke(); + const auto duration = armnn::GetTimeDuration(start_time); + + if (m_Params.m_DontPrintOutputs || m_Params.m_ReuseBuffers) + { + break; + } + // Print out the output + for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex) + { + auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex]; + TfLiteIntArray* outputDims = m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->dims; + // If we've been asked to write to a file then set a file output stream. Otherwise use stdout. + FILE* outputTensorFile = stdout; + if (!m_Params.m_OutputTensorFiles.empty()) + { + outputTensorFile = fopen(m_Params.m_OutputTensorFiles[outputIndex].c_str(), "w"); + if (outputTensorFile == NULL) + { + LogAndThrow("Specified output tensor file, \"" + m_Params.m_OutputTensorFiles[outputIndex] + + "\", cannot be created. Defaulting to stdout. Error was: " + std::strerror(errno)); + } + else + { + ARMNN_LOG(info) << "Writing output " << outputIndex << "' of iteration: " << x+1 << " to file: '" + << m_Params.m_OutputTensorFiles[outputIndex] << "'"; + } + } + long outputSize = 1; + for (unsigned int dim = 0; dim < static_cast(outputDims->size); ++dim) + { + outputSize *= outputDims->data[dim]; + } + + std::cout << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name << ": "; + results.push_back(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation); + + switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type) + { + + case kTfLiteFloat32: + { + auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%f ", tfLiteDelageOutputData[i]); + } + break; + } + case kTfLiteInt32: + { + auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); + } + break; + } + case kTfLiteUInt8: + { + auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%u ", tfLiteDelageOutputData[i]); + } + break; + } + case kTfLiteInt8: + { + auto tfLiteDelageOutputData = m_TfLiteInterpreter->typed_tensor(tfLiteDelegateOutputId); + for (int i = 0; i < outputSize; ++i) + { + fprintf(outputTensorFile, "%d ", tfLiteDelageOutputData[i]); + } + break; + } + default: + { + LogAndThrow("Unsupported output type"); + } + } + + std::cout << std::endl; + } + CheckInferenceTimeThreshold(duration, m_Params.m_ThresholdTime); + } + + std::cout << status; + return results; +} + +void TfLiteExecutor::CompareAndPrintResult(std::vector otherOutput) +{ + for (unsigned int outputIndex = 0; outputIndex < m_TfLiteInterpreter->outputs().size(); ++outputIndex) + { + auto tfLiteDelegateOutputId = m_TfLiteInterpreter->outputs()[outputIndex]; + float result = 0; + switch (m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->type) + { + case kTfLiteFloat32: + { + result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, + otherOutput[outputIndex], + m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); + + break; + } + case kTfLiteInt32: + { + result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, + otherOutput[outputIndex], + m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); + break; + } + case kTfLiteUInt8: + { + result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, + otherOutput[outputIndex], + m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); + break; + } + case kTfLiteInt8: + { + result = ComputeRMSE(m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->allocation, + otherOutput[outputIndex], + m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->bytes); + break; + } + default: + { + } + } + + std::cout << "RMSE of " + << m_TfLiteInterpreter->tensor(tfLiteDelegateOutputId)->name + << ": " << result << std::endl; + } +}; diff --git a/tests/ExecuteNetwork/TfliteExecutor.hpp b/tests/ExecuteNetwork/TfliteExecutor.hpp new file mode 100644 index 0000000000..623d6357eb --- /dev/null +++ b/tests/ExecuteNetwork/TfliteExecutor.hpp @@ -0,0 +1,35 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "IExecutor.hpp" +#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" +#include "ExecuteNetworkProgramOptions.hpp" +#include "armnn/utility/NumericCast.hpp" +#include "armnn/utility/Timer.hpp" + +#include +#include + +#include +#include +#include + +using namespace tflite; +class TfLiteExecutor : public IExecutor +{ +public: + TfLiteExecutor(const ExecuteNetworkParams& m_Params); + + std::vector Execute() override; + void PrintNetworkInfo() override{}; + void CompareAndPrintResult(std::vector otherOutput) override; + +private: + std::unique_ptr m_Model; + const ExecuteNetworkParams& m_Params; + std::unique_ptr m_TfLiteInterpreter; +}; + diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 93716e1a6f..268f60301c 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -15,6 +15,7 @@ #include #include +#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp" #include @@ -46,40 +47,6 @@ #include #include -namespace -{ - -inline bool CheckRequestedBackendsAreValid(const std::vector& backendIds, - armnn::Optional invalidBackendIds = armnn::EmptyOptional()) -{ - if (backendIds.empty()) - { - return false; - } - - armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds(); - - bool allValid = true; - for (const auto& backendId : backendIds) - { - if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end()) - { - allValid = false; - if (invalidBackendIds) - { - if (!invalidBackendIds.value().empty()) - { - invalidBackendIds.value() += ", "; - } - invalidBackendIds.value() += backendId; - } - } - } - return allValid; -} - -} // anonymous namespace - namespace InferenceModelInternal { using BindingPointInfo = armnn::BindingPointInfo; diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp index 6c74aaa6ed..e3c95d9312 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.cpp @@ -1,110 +1,12 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "NetworkExecutionUtils.hpp" #include -#include -#include - -#if defined(ARMNN_SERIALIZER) -#include "armnnDeserializer/IDeserializer.hpp" -#endif -#if defined(ARMNN_TF_LITE_PARSER) -#include "armnnTfLiteParser/ITfLiteParser.hpp" -#endif -#if defined(ARMNN_ONNX_PARSER) -#include "armnnOnnxParser/IOnnxParser.hpp" -#endif - -template -auto ParseDataArray(std::istream& stream); - -template -auto ParseDataArray(std::istream& stream, - const float& quantizationScale, - const int32_t& quantizationOffset); - -template<> -auto ParseDataArray(std::istream& stream) -{ - return ParseArrayImpl(stream, [](const std::string& s) { return std::stof(s); }); -} - -template<> -auto ParseDataArray(std::istream& stream) -{ - return ParseArrayImpl(stream, [](const std::string& s) { return std::stoi(s); }); -} - -template<> -auto ParseDataArray(std::istream& stream) -{ - return ParseArrayImpl(stream, - [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); -} - -template<> -auto ParseDataArray(std::istream& stream) -{ - return ParseArrayImpl(stream, - [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); -} - - -template<> -auto ParseDataArray(std::istream& stream) -{ - return ParseArrayImpl(stream, - [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); -} - -template<> -auto ParseDataArray(std::istream& stream, - const float& quantizationScale, - const int32_t& quantizationOffset) -{ - return ParseArrayImpl(stream, - [&quantizationScale, &quantizationOffset](const std::string& s) - { - return armnn::numeric_cast( - armnn::Quantize(std::stof(s), - quantizationScale, - quantizationOffset)); - }); -} - -template<> -auto ParseDataArray(std::istream& stream, - const float& quantizationScale, - const int32_t& quantizationOffset) -{ - return ParseArrayImpl(stream, - [&quantizationScale, &quantizationOffset](const std::string& s) - { - return armnn::numeric_cast( - armnn::Quantize(std::stof(s), - quantizationScale, - quantizationOffset)); - }); -} - -template> -std::vector GenerateDummyTensorData(unsigned int numElements) -{ - return std::vector(numElements, static_cast(0)); -} - - -std::vector ParseArray(std::istream& stream) -{ - return ParseArrayImpl( - stream, - [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); -} - +#include std::vector ParseStringList(const std::string& inputString, const char* delimiter) { std::stringstream stream(inputString); @@ -112,189 +14,27 @@ std::vector ParseStringList(const std::string& inputString, const c return armnn::stringUtils::StringTrimCopy(s); }, delimiter); } - -TensorPrinter::TensorPrinter(const std::string& binding, - const armnn::TensorInfo& info, - const std::string& outputTensorFile, - bool dequantizeOutput, - const bool printToConsole) - : m_OutputBinding(binding) - , m_Scale(info.GetQuantizationScale()) - , m_Offset(info.GetQuantizationOffset()) - , m_OutputTensorFile(outputTensorFile) - , m_DequantizeOutput(dequantizeOutput) - , m_PrintToConsole(printToConsole) {} - -void TensorPrinter::operator()(const std::vector& values) -{ - if (m_PrintToConsole) - { - std::cout << m_OutputBinding << ": "; - ForEachValue(values, [](float value) - { - printf("%f ", value); - }); - printf("\n"); - } - WriteToFile(values); -} - -void TensorPrinter::operator()(const std::vector& values) +bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, + const double& thresholdTime) { - if(m_DequantizeOutput) + ARMNN_LOG(info) << "Inference time: " << std::setprecision(2) + << std::fixed << duration.count() << " ms\n"; + // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line + if (thresholdTime != 0.0) { - auto& scale = m_Scale; - auto& offset = m_Offset; - std::vector dequantizedValues; - ForEachValue(values, [&scale, &offset, &dequantizedValues](uint8_t value) - { - auto dequantizedValue = armnn::Dequantize(value, scale, offset); - dequantizedValues.push_back(dequantizedValue); - }); - - if (m_PrintToConsole) + ARMNN_LOG(info) << "Threshold time: " << std::setprecision(2) + << std::fixed << thresholdTime << " ms"; + auto thresholdMinusInference = thresholdTime - duration.count(); + ARMNN_LOG(info) << "Threshold time - Inference time: " << std::setprecision(2) + << std::fixed << thresholdMinusInference << " ms" << "\n"; + if (thresholdMinusInference < 0) { - std::cout << m_OutputBinding << ": "; - ForEachValue(dequantizedValues, [](float value) - { - printf("%f ", value); - }); - printf("\n"); + std::string errorMessage = "Elapsed inference time is greater than provided threshold time."; + ARMNN_LOG(fatal) << errorMessage; + return false; } - - WriteToFile(dequantizedValues); - } - else - { - const std::vector intValues(values.begin(), values.end()); - operator()(intValues); } -} - -void TensorPrinter::operator()(const std::vector& values) -{ - if (m_PrintToConsole) - { - std::cout << m_OutputBinding << ": "; - ForEachValue(values, [](int8_t value) - { - printf("%d ", value); - }); - printf("\n"); - } - WriteToFile(values); -} - -void TensorPrinter::operator()(const std::vector& values) -{ - if (m_PrintToConsole) - { - std::cout << m_OutputBinding << ": "; - ForEachValue(values, [](int value) - { - printf("%d ", value); - }); - printf("\n"); - } - WriteToFile(values); -} - -template -void TensorPrinter::ForEachValue(const Container& c, Delegate delegate) -{ - for (const auto& value : c) - { - delegate(value); - } -} - -template -void TensorPrinter::WriteToFile(const std::vector& values) -{ - if (!m_OutputTensorFile.empty()) - { - std::ofstream outputTensorFile; - outputTensorFile.open(m_OutputTensorFile, std::ofstream::out | std::ofstream::trunc); - if (outputTensorFile.is_open()) - { - outputTensorFile << m_OutputBinding << ": "; - std::copy(values.begin(), values.end(), std::ostream_iterator(outputTensorFile, " ")); - } - else - { - ARMNN_LOG(info) << "Output Tensor File: " << m_OutputTensorFile << " could not be opened!"; - } - outputTensorFile.close(); - } -} - -void PopulateTensorWithData(armnnUtils::TContainer& tensorData, - unsigned int numElements, - const std::string& dataTypeStr, - const armnn::Optional& qParams, - const armnn::Optional& dataFile) -{ - const bool readFromFile = dataFile.has_value() && !dataFile.value().empty(); - const bool quantizeData = qParams.has_value(); - - std::ifstream inputTensorFile; - if (readFromFile) - { - inputTensorFile = std::ifstream(dataFile.value()); - } - - if (dataTypeStr.compare("float") == 0) - { - if (quantizeData) - { - const float qScale = qParams.value().first; - const int qOffset = qParams.value().second; - - tensorData = readFromFile ? - ParseDataArray(inputTensorFile, qScale, qOffset) : - GenerateDummyTensorData(numElements); - } - else - { - tensorData = readFromFile ? - ParseDataArray(inputTensorFile) : - GenerateDummyTensorData(numElements); - } - } - else if (dataTypeStr.compare("int") == 0) - { - tensorData = readFromFile ? - ParseDataArray(inputTensorFile) : - GenerateDummyTensorData(numElements); - } - else if (dataTypeStr.compare("qsymms8") == 0) - { - tensorData = readFromFile ? - ParseDataArray(inputTensorFile) : - GenerateDummyTensorData(numElements); - } - else if (dataTypeStr.compare("qasymm8") == 0 || dataTypeStr.compare("qasymmu8") == 0) - { - tensorData = readFromFile ? - ParseDataArray(inputTensorFile) : - GenerateDummyTensorData(numElements); - } - else if (dataTypeStr.compare("qasymms8") == 0) - { - tensorData = readFromFile ? - ParseDataArray(inputTensorFile) : - GenerateDummyTensorData(numElements); - } - else - { - std::string errorMessage = "Unsupported tensor data type " + dataTypeStr; - ARMNN_LOG(fatal) << errorMessage; - - inputTensorFile.close(); - throw armnn::Exception(errorMessage); - } - - inputTensorFile.close(); + return true; } bool ValidatePath(const std::string& file, const bool expectFile) @@ -312,6 +52,13 @@ bool ValidatePath(const std::string& file, const bool expectFile) return true; } +std::vector ParseArray(std::istream& stream) +{ + return ParseArrayImpl( + stream, + [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }); +} + bool ValidatePaths(const std::vector& fileVec, const bool expectFile) { bool allPathsValid = true; @@ -325,5 +72,9 @@ bool ValidatePaths(const std::vector& fileVec, const bool expectFil return allPathsValid; } - +void LogAndThrow(std::string eMsg) +{ + ARMNN_LOG(error) << eMsg; + throw armnn::Exception(eMsg); +} diff --git a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp index bc2868ab35..14d7fe5551 100644 --- a/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp +++ b/tests/NetworkExecutionUtils/NetworkExecutionUtils.hpp @@ -1,63 +1,83 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once -#include -#include #include #include -#include +#include +#include #include #include +#include +#include +/** + * Given a measured duration and a threshold time tell the user whether we succeeded or not. + * + * @param duration the measured inference duration. + * @param thresholdTime the threshold time in milliseconds. + * @return false if the measured time exceeded the threshold. + */ +bool CheckInferenceTimeThreshold(const std::chrono::duration& duration, + const double& thresholdTime); -std::vector ParseArray(std::istream& stream); - -/// Splits a given string at every accurance of delimiter into a vector of string -std::vector ParseStringList(const std::string& inputString, const char* delimiter); - -struct TensorPrinter +inline bool CheckRequestedBackendsAreValid(const std::vector& backendIds, + armnn::Optional invalidBackendIds = armnn::EmptyOptional()) { - TensorPrinter(const std::string& binding, - const armnn::TensorInfo& info, - const std::string& outputTensorFile, - bool dequantizeOutput, - bool printToConsole = true); - - void operator()(const std::vector& values); - - void operator()(const std::vector& values); - - void operator()(const std::vector& values); + if (backendIds.empty()) + { + return false; + } - void operator()(const std::vector& values); + armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds(); -private: - template - void ForEachValue(const Container& c, Delegate delegate); + bool allValid = true; + for (const auto& backendId : backendIds) + { + if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end()) + { + allValid = false; + if (invalidBackendIds) + { + if (!invalidBackendIds.value().empty()) + { + invalidBackendIds.value() += ", "; + } + invalidBackendIds.value() += backendId; + } + } + } + return allValid; +} - template - void WriteToFile(const std::vector& values); +std::vector ParseArray(std::istream& stream); - std::string m_OutputBinding; - float m_Scale; - int m_Offset; - std::string m_OutputTensorFile; - bool m_DequantizeOutput; - bool m_PrintToConsole; -}; +/// Splits a given string at every accurance of delimiter into a vector of string +std::vector ParseStringList(const std::string& inputString, const char* delimiter); -using QuantizationParams = std::pair; +/// Dequantize an array of a given type +/// @param array Type erased array to dequantize +/// @param numElements Elements in the array +/// @param array Type erased array to dequantize +template +std::vector DequantizeArray(const void* array, unsigned int numElements, float scale, int32_t offset) +{ + const T* quantizedArray = reinterpret_cast(array); + std::vector dequantizedVector; + dequantizedVector.reserve(numElements); + for (unsigned int i = 0; i < numElements; ++i) + { + float f = armnn::Dequantize(*(quantizedArray + i), scale, offset); + dequantizedVector.push_back(f); + } + return dequantizedVector; +} -void PopulateTensorWithData(armnnUtils::TContainer& tensorData, - unsigned int numElements, - const std::string& dataTypeStr, - const armnn::Optional& qParams, - const armnn::Optional& dataFile); +void LogAndThrow(std::string eMsg); /** * Verifies if the given string is a valid path. Reports invalid paths to std::err. @@ -75,6 +95,152 @@ bool ValidatePath(const std::string& file, const bool expectFile); * */ bool ValidatePaths(const std::vector& fileVec, const bool expectFile); +/// Returns a function of read the given type as a string +template ::value>* = nullptr> +std::function GetParseElementFunc() +{ + return [](const std::string& s) { return armnn::numeric_cast(std::stoi(s)); }; +} + +template ::value>* = nullptr> +std::function GetParseElementFunc() +{ + return [](const std::string& s) { return std::stof(s); }; +} + +template +void PopulateTensorWithData(T* tensor, + const unsigned int numElements, + const armnn::Optional& dataFile, + const std::string& inputName) +{ + const bool readFromFile = dataFile.has_value() && !dataFile.value().empty(); + + std::ifstream inputTensorFile; + if (!readFromFile) + { + std::fill(tensor, tensor + numElements, 0); + return; + } + else + { + inputTensorFile = std::ifstream(dataFile.value()); + } + + auto parseElementFunc = GetParseElementFunc(); + std::string line; + unsigned int index = 0; + while (std::getline(inputTensorFile, line)) + { + std::vector tokens = armnn::stringUtils::StringTokenizer(line, "\t ,:"); + for (const std::string& token : tokens) + { + if (!token.empty()) // See https://stackoverflow.com/questions/10437406/ + { + try + { + if (index == numElements) + { + ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << dataFile.value() + << "\" does not match number of elements: " << numElements + << " for input \"" << inputName << "\"."; + } + *(tensor + index) = parseElementFunc(token); + index++; + } + catch (const std::exception&) + { + ARMNN_LOG(error) << "'" << token << "' is not a valid number. It has been ignored."; + } + } + } + } + + if (index != numElements) + { + ARMNN_LOG(error) << "Number of elements: " << (index +1) << " in file \"" << inputName + << "\" does not match number of elements: " << numElements + << " for input \"" << inputName << "\"."; + } +} + +template +void WriteToFile(const std::string& outputTensorFileName, + const std::string& outputName, + const T* const array, + const unsigned int numElements) +{ + std::ofstream outputTensorFile; + outputTensorFile.open(outputTensorFileName, std::ofstream::out | std::ofstream::trunc); + if (outputTensorFile.is_open()) + { + outputTensorFile << outputName << ": "; + std::copy(array, array + numElements, std::ostream_iterator(outputTensorFile, " ")); + } + else + { + ARMNN_LOG(info) << "Output Tensor File: " << outputTensorFileName << " could not be opened!"; + } + outputTensorFile.close(); +} + +struct OutputWriteInfo +{ + const armnn::Optional& m_OutputTensorFile; + const std::string& m_OutputName; + const armnn::Tensor& m_Tensor; + const bool m_PrintTensor; +}; + +template +void PrintTensor(OutputWriteInfo& info, const char* formatString) +{ + const T* array = reinterpret_cast(info.m_Tensor.GetMemoryArea()); + + if (info.m_OutputTensorFile.has_value()) + { + WriteToFile(info.m_OutputTensorFile.value(), + info.m_OutputName, + array, + info.m_Tensor.GetNumElements()); + } + + if (info.m_PrintTensor) + { + for (unsigned int i = 0; i < info.m_Tensor.GetNumElements(); i++) + { + printf(formatString, array[i]); + } + } +} + +template +void PrintQuantizedTensor(OutputWriteInfo& info) +{ + std::vector dequantizedValues; + auto tensor = info.m_Tensor; + dequantizedValues = DequantizeArray(tensor.GetMemoryArea(), + tensor.GetNumElements(), + tensor.GetInfo().GetQuantizationScale(), + tensor.GetInfo().GetQuantizationOffset()); + + if (info.m_OutputTensorFile.has_value()) + { + WriteToFile(info.m_OutputTensorFile.value(), + info.m_OutputName, + dequantizedValues.data(), + tensor.GetNumElements()); + } + + if (info.m_PrintTensor) + { + std::for_each(dequantizedValues.begin(), dequantizedValues.end(), [&](float value) + { + printf("%f ", value); + }); + } +} + template std::vector ParseArrayImpl(std::istream& stream, TParseElementFunc parseElementFunc, const char* chars = "\t ,:") { @@ -103,21 +269,28 @@ std::vector ParseArrayImpl(std::istream& stream, TParseElementFunc parseEleme return result; } -template -void PopulateTensorWithDataGeneric(std::vector& tensorData, - unsigned int numElements, - const armnn::Optional& dataFile, - TParseElementFunc parseFunction) +/// Compute the root-mean-square error (RMSE) +/// @param expected +/// @param actual +/// @param size size of the tensor +/// @return float the RMSE +template +float ComputeRMSE(const void* expected, const void* actual, const size_t size) { - const bool readFromFile = dataFile.has_value() && !dataFile.value().empty(); + auto typedExpected = reinterpret_cast(expected); + auto typedActual = reinterpret_cast(actual); - std::ifstream inputTensorFile; - if (readFromFile) + T errorSum = 0; + + for (unsigned int i = 0; i < size; i++) { - inputTensorFile = std::ifstream(dataFile.value()); + if (std::abs(typedExpected[i] - typedActual[i]) != 0) + { + std::cout << ""; + } + errorSum += std::pow(std::abs(typedExpected[i] - typedActual[i]), 2); } - tensorData = readFromFile ? - ParseArrayImpl(inputTensorFile, parseFunction) : - std::vector(numElements, static_cast(0)); -} + float rmse = std::sqrt(armnn::numeric_cast(errorSum) / armnn::numeric_cast(size / sizeof(T))); + return rmse; +} \ No newline at end of file -- cgit v1.2.1