aboutsummaryrefslogtreecommitdiff
path: root/tests/ExecuteNetwork/ArmNNExecutor.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'tests/ExecuteNetwork/ArmNNExecutor.cpp')
-rw-r--r--tests/ExecuteNetwork/ArmNNExecutor.cpp805
1 files changed, 805 insertions, 0 deletions
diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp
new file mode 100644
index 0000000000..5be3383061
--- /dev/null
+++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp
@@ -0,0 +1,805 @@
+//
+// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+
+#include "ArmNNExecutor.hpp"
+#include "NetworkExecutionUtils/NetworkExecutionUtils.hpp"
+
+#include <armnn/IAsyncExecutionCallback.hpp>
+#include <AsyncExecutionCallback.hpp>
+
+
+using namespace armnn;
+using namespace std::chrono;
+
+ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime::CreationOptions runtimeOptions)
+: m_Params(params)
+{
+ runtimeOptions.m_EnableGpuProfiling = params.m_EnableProfiling;
+ runtimeOptions.m_DynamicBackendsPath = params.m_DynamicBackendsPath;
+ m_Runtime = armnn::IRuntime::Create(runtimeOptions);
+
+ auto parser = CreateParser();
+ auto network = parser->CreateNetwork(m_Params);
+ auto optNet = OptimizeNetwork(network.get());
+
+ m_IOInfo = GetIOInfo(optNet.get());
+ SetupInputsAndOutputs();
+
+ std::string errorMsg;
+
+ armnn::ProfilingDetailsMethod profilingDetailsMethod = ProfilingDetailsMethod::Undefined;
+ if (params.m_OutputDetailsOnlyToStdOut)
+ {
+ profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsOnly;
+ }
+ else if (params.m_OutputDetailsToStdOut)
+ {
+ profilingDetailsMethod = armnn::ProfilingDetailsMethod::DetailsWithEvents;
+ }
+
+ INetworkProperties networkProperties{m_Params.m_Concurrent,
+ MemorySource::Undefined,
+ MemorySource::Undefined,
+ params.m_EnableProfiling,
+ profilingDetailsMethod};
+
+ m_Runtime->LoadNetwork(m_NetworkId, std::move(optNet), errorMsg, networkProperties);
+
+ if (m_Params.m_Iterations > 1)
+ {
+ std::stringstream msg;
+ msg << "Network will be executed " << m_Params.m_Iterations;
+ if (m_Params.m_Concurrent)
+ {
+ msg << " times in an asynchronous manner. ";
+ }
+ else
+ {
+ msg << " times successively. ";
+ }
+ msg << "The input-tensor-data files will be reused recursively if the user didn't provide enough to "
+ "cover each execution.";
+ ARMNN_LOG(info) << msg.str();
+ }
+
+ if (m_Params.m_GenerateTensorData)
+ {
+ ARMNN_LOG(warning) << "The input data was generated, note that the output will not be useful";
+ }
+
+ if (m_Params.m_DontPrintOutputs)
+ {
+ ARMNN_LOG(info) << "Printing outputs to console is disabled.";
+ }
+}
+
+void ArmNNExecutor::ExecuteAsync()
+{
+ std::vector<std::shared_ptr<armnn::IWorkingMemHandle>> memHandles;
+ std::unique_ptr<armnn::Threadpool> threadpool;
+ armnn::AsyncCallbackManager callbackManager;
+ std::unordered_map<armnn::InferenceId, const armnn::OutputTensors*> inferenceOutputMap;
+
+ for (size_t i = 0; i < m_Params.m_ThreadPoolSize; ++i)
+ {
+ memHandles.emplace_back(m_Runtime->CreateWorkingMemHandle(m_NetworkId));
+ }
+
+ threadpool = std::make_unique<armnn::Threadpool>(m_Params.m_ThreadPoolSize,
+ m_Runtime.get(),
+ memHandles);
+
+ ARMNN_LOG(info) << "Asynchronous Execution with Arm NN thread pool... \n";
+ // Declare the latest and earliest inference times here to be used when calculating overall time
+ std::chrono::high_resolution_clock::time_point earliestStartTime =
+ std::chrono::high_resolution_clock::time_point::max();
+ std::chrono::high_resolution_clock::time_point latestEndTime =
+ std::chrono::high_resolution_clock::now();
+
+ // For the asynchronous execution, we are adding a pool of working memory handles (1 per thread) in the
+ // LoadedNetwork with each scheduled inference having a specific priority
+ for (size_t i = 0; i < m_Params.m_Iterations; ++i)
+ {
+ std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+
+ std::shared_ptr<armnn::AsyncExecutionCallback> cb = callbackManager.GetNewCallback();
+ inferenceOutputMap.insert({cb->GetInferenceId(), &m_OutputTensorsVec[i]});
+ threadpool->Schedule(m_NetworkId,
+ m_InputTensorsVec[i],
+ m_OutputTensorsVec[i],
+ armnn::QosExecPriority::Medium,
+ cb);
+ }
+
+ // Check the results
+ for (size_t iteration = 0; iteration < m_Params.m_Iterations; ++iteration)
+ {
+ auto cb = callbackManager.GetNotifiedCallback();
+
+ // Get the results
+ if (earliestStartTime > cb->GetStartTime())
+ {
+ earliestStartTime = cb->GetStartTime();
+ }
+ if (latestEndTime < cb->GetEndTime())
+ {
+ latestEndTime = cb->GetEndTime();
+ }
+
+ auto startTime = time_point_cast<std::chrono::milliseconds>(cb->GetStartTime());
+ auto endTime = time_point_cast<std::chrono::milliseconds>(cb->GetEndTime());
+ auto inferenceDuration = endTime - startTime;
+ CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
+ if(!m_Params.m_DontPrintOutputs)
+ {
+ const armnn::OutputTensors* out = inferenceOutputMap[cb->GetInferenceId()];
+ PrintOutputTensors(out, iteration);
+ }
+ }
+
+ // Print duration difference between overallStartTime and overallEndTime
+ auto overallEndTime = time_point_cast<std::chrono::milliseconds>(latestEndTime);
+ auto overallStartTime = time_point_cast<std::chrono::milliseconds>(earliestStartTime);
+ auto totalInferenceDuration = overallEndTime - overallStartTime;
+ ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2)
+ << std::fixed << totalInferenceDuration.count() << " ms\n";
+
+}
+
+void ArmNNExecutor::ExecuteSync()
+{
+ for (size_t x = 0; x < m_Params.m_Iterations; x++)
+ {
+ std::shared_ptr<armnn::IProfiler> profiler = m_Runtime->GetProfiler(m_NetworkId);
+
+ const auto start_time = armnn::GetTimeNow();
+ armnn::Status ret;
+ if (m_Params.m_ImportInputsIfAligned)
+ {
+ ret = m_Runtime->EnqueueWorkload(m_NetworkId,
+ m_InputTensorsVec[x],
+ m_OutputTensorsVec[x],
+ m_ImportedInputIds[x],
+ m_ImportedOutputIds[x]);
+ }
+ else
+ {
+ ret = m_Runtime->EnqueueWorkload(m_NetworkId,
+ m_InputTensorsVec[x],
+ m_OutputTensorsVec[x]);
+ }
+
+ const auto inferenceDuration = armnn::GetTimeDuration(start_time);
+
+ // If profiling is enabled print out the results
+ if(profiler && profiler->IsProfilingEnabled())
+ {
+ profiler->Print(std::cout);
+ }
+
+ if(ret == armnn::Status::Failure)
+ {
+ throw armnn::Exception("IRuntime::EnqueueWorkload failed");
+ }
+
+ if(!m_Params.m_DontPrintOutputs)
+ {
+ PrintOutputTensors(&m_OutputTensorsVec[x], x);
+ }
+
+ // If thresholdTime == 0.0 (default), then it hasn't been supplied at command line
+ CheckInferenceTimeThreshold(inferenceDuration, m_Params.m_ThresholdTime);
+ }
+}
+
+std::vector<const void*> ArmNNExecutor::Execute()
+{
+ if(m_Params.m_ThreadPoolSize == 0)
+ {
+ ExecuteSync();
+ }
+ else
+ {
+ ExecuteAsync();
+ }
+ std::vector<const void*> results;
+ for (auto& output : m_OutputStorage)
+ {
+ results.push_back(output.m_Mem);
+ }
+
+ return results;
+}
+
+void ArmNNExecutor::PrintNetworkInfo()
+{
+ const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
+ m_Params.m_InputNames :
+ m_IOInfo.m_InputNames;
+ std::stringstream ss;
+ ss << "===== Network Info =====\n";
+ ss << "Inputs in order:\n";
+ for (const auto& inputName : inputNames)
+ {
+ const auto inputInfo = m_IOInfo.m_InputInfoMap[inputName].second;
+ ss << inputName << ", " << inputInfo.GetShape() << ", " << GetDataTypeName(inputInfo.GetDataType());
+ if (inputInfo.IsQuantized())
+ {
+ ss << " Quantization Offset: " << inputInfo.GetQuantizationOffset();
+ if (inputInfo.HasMultipleQuantizationScales())
+ {
+ ss << " Quantization scales: ";
+ for (const auto scale: inputInfo.GetQuantizationScales())
+ {
+ ss << scale << ", ";
+ }
+ }
+ else
+ {
+ ss << " Quantization scale: " << inputInfo.GetQuantizationScale();
+ }
+ }
+ ss << "\n";
+ }
+
+ ss << "Outputs in order:\n";
+ for (const auto& outputName : m_IOInfo.m_OutputNames)
+ {
+ const auto outputInfo = m_IOInfo.m_OutputInfoMap[outputName].second;
+ ss << outputName << ", " << outputInfo.GetShape() << ", " << GetDataTypeName(outputInfo.GetDataType());
+ if (outputInfo.IsQuantized())
+ {
+ ss << " Quantization Offset: " << outputInfo.GetQuantizationOffset();
+ if (outputInfo.HasMultipleQuantizationScales())
+ {
+ ss << " Quantization scales: ";
+ for (const auto scale: outputInfo.GetQuantizationScales())
+ {
+ ss << scale << ", ";
+ }
+ }
+ else
+ {
+ ss << " Quantization scale: " << outputInfo.GetQuantizationScale();
+ }
+ }
+ ss << "\n";
+ }
+
+ std::cout << ss.str() << std::endl;
+}
+
+void ArmNNExecutor::SetupInputsAndOutputs()
+{
+ const unsigned int noOfInputs = m_IOInfo.m_InputNames.size();
+
+ if (m_Params.m_InputNames.size() != 0 && m_Params.m_InputNames.size() != noOfInputs)
+ {
+ LogAndThrow("Number of input names does not match number of inputs");
+ }
+
+ const unsigned int inputFilePaths = m_Params.m_InputTensorDataFilePaths.size();
+ const std::vector<std::string>& inputNames = m_Params.m_InputNames.size() != 0 ?
+ m_Params.m_InputNames :
+ m_IOInfo.m_InputNames;
+ unsigned int noInputSets = 1;
+
+ if (inputFilePaths != 0)
+ {
+ if (inputFilePaths % noOfInputs != 0)
+ {
+ LogAndThrow("Number of input files: " + std::to_string(inputFilePaths) +
+ " not compatible with number of inputs: " + std::to_string(noOfInputs));
+ }
+ noInputSets = inputFilePaths / noOfInputs;
+ if (noInputSets != 1 && m_Params.m_ReuseBuffers)
+ {
+ LogAndThrow("Specifying multiple sets of inputs not compatible with ReuseBuffers");
+ }
+ }
+
+ const unsigned int noOfOutputs = m_IOInfo.m_OutputNames.size();
+ const unsigned int outputFilePaths = m_Params.m_OutputTensorFiles.size();
+ unsigned int noOutputSets = 1;
+
+ if (outputFilePaths != 0)
+ {
+ if (outputFilePaths % noOfOutputs != 0)
+ {
+ LogAndThrow("Number of output files: " + std::to_string(outputFilePaths) +
+ ", not compatible with number of outputs: " + std::to_string(noOfOutputs));
+ }
+ noOutputSets = outputFilePaths / noOfOutputs;
+
+ if (noOutputSets != 1 && m_Params.m_ReuseBuffers)
+ {
+ LogAndThrow("Specifying multiple sets of outputs not compatible with ReuseBuffers");
+ }
+ }
+
+ if (m_Params.m_ThreadPoolSize != 0)
+ {
+ // The current implementation of the Threadpool does not allow binding of outputs to a thread
+ // So to ensure no two threads write to the same output at the same time, no output can be reused
+ noOutputSets = m_Params.m_Iterations;
+ }
+
+ if (m_Params.m_InputTensorDataFilePaths.size() > noOfInputs)
+ {
+ ARMNN_LOG(info) << "Given network has " << noOfInputs << " input/s. One input-tensor-data file is required "
+ << "for each input. The user provided "
+ << m_Params.m_InputTensorDataFilePaths.size()
+ << " input-tensor-data file/s which will be used to fill the input/s.\n";
+ }
+
+ unsigned int inputCount = 0;
+ for(unsigned int inputSet = 0; inputSet < noInputSets; ++inputSet)
+ {
+ armnn::InputTensors inputTensors;
+ for (const auto& inputName: inputNames)
+ {
+ armnn::BindingPointInfo bindingPointInfo;
+ try
+ {
+ bindingPointInfo = m_IOInfo.m_InputInfoMap.at(inputName);
+ }
+ catch (const std::out_of_range& e)
+ {
+ LogAndThrow("Input with inputName: " + inputName + " not found.");
+ }
+
+ const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
+ auto newInfo = armnn::TensorInfo{tensorInfo.GetShape(), tensorInfo.GetDataType(),
+ tensorInfo.GetQuantizationScale(),
+ tensorInfo.GetQuantizationOffset(),
+ true};
+
+ m_InputStorage.emplace_back(IOStorage{tensorInfo.GetNumBytes()});
+
+ const int bindingId = bindingPointInfo.first;
+ inputTensors.emplace_back(bindingId, armnn::ConstTensor{newInfo, m_InputStorage.back().m_Mem});
+
+ const armnn::Optional<std::string> dataFile = m_Params.m_GenerateTensorData ?
+ armnn::EmptyOptional() :
+ armnn::MakeOptional<std::string>(
+ m_Params.m_InputTensorDataFilePaths.at(inputCount++));
+
+ switch (tensorInfo.GetDataType())
+ {
+ case armnn::DataType::Float32:
+ {
+ auto typedTensor = reinterpret_cast<float*>(m_InputStorage.back().m_Mem);
+ PopulateTensorWithData<float>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+ break;
+ }
+ case armnn::DataType::QSymmS16:
+ {
+ auto typedTensor = reinterpret_cast<int16_t*>(m_InputStorage.back().m_Mem);
+ PopulateTensorWithData<int16_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+ break;
+ }
+ case armnn::DataType::QSymmS8:
+ case armnn::DataType::QAsymmS8:
+ {
+ auto typedTensor = reinterpret_cast<int8_t*>(m_InputStorage.back().m_Mem);
+ PopulateTensorWithData<int8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+ break;
+ }
+ case armnn::DataType::QAsymmU8:
+ {
+ auto typedTensor = reinterpret_cast<uint8_t*>(m_InputStorage.back().m_Mem);
+ PopulateTensorWithData<uint8_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+ break;
+ }
+ case armnn::DataType::Signed32:
+ {
+ auto typedTensor = reinterpret_cast<int32_t*>(m_InputStorage.back().m_Mem);
+ PopulateTensorWithData<int32_t>(typedTensor, tensorInfo.GetNumElements(), dataFile, inputName);
+ break;
+ }
+ default:
+ {
+ LogAndThrow("Unexpected DataType");
+ }
+ }
+
+ if (m_Params.m_ImportInputsIfAligned)
+ {
+ m_ImportedInputIds.push_back(
+ m_Runtime->ImportInputs(m_NetworkId, m_InputTensorsVec.back(), armnn::MemorySource::Malloc));
+ }
+ }
+ m_InputTensorsVec.emplace_back(inputTensors);
+ }
+
+ for(unsigned int outputSet = 0; outputSet < noOutputSets; ++outputSet)
+ {
+ armnn::OutputTensors outputTensors;
+ for (const auto& output: m_IOInfo.m_OutputInfoMap)
+ {
+ const armnn::BindingPointInfo& bindingPointInfo = output.second;
+ const armnn::TensorInfo& tensorInfo = bindingPointInfo.second;
+
+ m_OutputStorage.emplace_back(tensorInfo.GetNumBytes());
+ outputTensors.emplace_back(bindingPointInfo.first, armnn::Tensor{tensorInfo, m_OutputStorage.back().m_Mem});
+ }
+ m_OutputTensorsVec.emplace_back(outputTensors);
+ if (m_Params.m_ImportInputsIfAligned)
+ {
+ m_ImportedOutputIds.push_back(
+ m_Runtime->ImportOutputs(m_NetworkId, m_OutputTensorsVec.back(), armnn::MemorySource::Malloc));
+ }
+ }
+
+ // Fill the remaining iterations with copies
+ const unsigned int remainingInputSets = m_Params.m_Iterations - noInputSets;
+ for (unsigned int i = 1; i <= remainingInputSets; i++)
+ {
+ m_InputTensorsVec.push_back(m_InputTensorsVec[noInputSets % i]);
+ if (m_Params.m_ImportInputsIfAligned)
+ {
+ m_ImportedInputIds.push_back(m_ImportedInputIds[noInputSets % i]);
+ }
+ }
+
+ const unsigned int remainingOutputSets = m_Params.m_Iterations - noOutputSets;
+ for (unsigned int i = 1; i <= remainingOutputSets; i++)
+ {
+ m_OutputTensorsVec.push_back(m_OutputTensorsVec[noOutputSets % i]);
+ if (m_Params.m_ImportInputsIfAligned)
+ {
+ m_ImportedOutputIds.push_back(m_ImportedOutputIds[noOutputSets % i]);
+ }
+ }
+}
+
+ArmNNExecutor::IOInfo ArmNNExecutor::GetIOInfo(armnn::IOptimizedNetwork* optNet)
+{
+ struct IOStrategy : armnn::IStrategy
+ {
+ void ExecuteStrategy(const armnn::IConnectableLayer* layer,
+ const armnn::BaseDescriptor& descriptor,
+ const std::vector<armnn::ConstTensor>& constants,
+ const char* name,
+ const armnn::LayerBindingId id = 0) override
+ {
+ armnn::IgnoreUnused(descriptor, constants, id);
+ switch (layer->GetType())
+ {
+ case armnn::LayerType::Input:
+ {
+ m_IOInfo.m_InputNames.emplace_back(name);
+ m_IOInfo.m_InputInfoMap[name] = {id, layer->GetOutputSlot(0).GetTensorInfo()};
+ break;
+ }
+ case armnn::LayerType::Output:
+ {
+ m_IOInfo.m_OutputNames.emplace_back(name);
+ m_IOInfo.m_OutputInfoMap[name] = {id, layer->GetInputSlot(0).GetConnection()->GetTensorInfo()};
+ break;
+ }
+ default: {}
+ }
+ }
+ IOInfo m_IOInfo;
+ };
+
+ IOStrategy ioStrategy;
+ optNet->ExecuteStrategy(ioStrategy);
+
+ return ioStrategy.m_IOInfo;
+}
+
+armnn::IOptimizedNetworkPtr ArmNNExecutor::OptimizeNetwork(armnn::INetwork* network)
+{
+ armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork*){}};
+
+ armnn::OptimizerOptions options;
+ options.m_ReduceFp32ToFp16 = m_Params.m_EnableFp16TurboMode;
+ options.m_ReduceFp32ToBf16 = m_Params.m_EnableBf16TurboMode;
+ options.m_Debug = m_Params.m_PrintIntermediate;
+ options.m_shapeInferenceMethod = m_Params.m_InferOutputShape ?
+ armnn::ShapeInferenceMethod::InferAndValidate :
+ armnn::ShapeInferenceMethod::ValidateOnly;
+ options.m_ProfilingEnabled = m_Params.m_EnableProfiling;
+
+ armnn::BackendOptions gpuAcc("GpuAcc",
+ {
+ { "FastMathEnabled", m_Params.m_EnableFastMath },
+ { "SaveCachedNetwork", m_Params.m_SaveCachedNetwork },
+ { "CachedNetworkFilePath", m_Params.m_CachedNetworkFilePath },
+ { "MLGOTuningFilePath", m_Params.m_MLGOTuningFilePath }
+ });
+
+ armnn::BackendOptions cpuAcc("CpuAcc",
+ {
+ { "FastMathEnabled", m_Params.m_EnableFastMath },
+ { "NumberOfThreads", m_Params.m_NumberOfThreads }
+ });
+ options.m_ModelOptions.push_back(gpuAcc);
+ options.m_ModelOptions.push_back(cpuAcc);
+
+ const auto optimization_start_time = armnn::GetTimeNow();
+ optNet = armnn::Optimize(*network, m_Params.m_ComputeDevices, m_Runtime->GetDeviceSpec(), options);
+
+ ARMNN_LOG(info) << "Optimization time: " << std::setprecision(2)
+ << std::fixed << armnn::GetTimeDuration(optimization_start_time).count() << " ms\n";
+
+ if (!optNet)
+ {
+ LogAndThrow("Optimize returned nullptr");
+ }
+
+ return optNet;
+}
+
+std::unique_ptr<ArmNNExecutor::IParser> ArmNNExecutor::CreateParser()
+{
+ // If no model format is given check the file name
+ const std::string& modelFormat = m_Params.m_ModelPath;
+
+ m_Params.m_IsModelBinary = modelFormat.find("json") == std::string::npos ? true : false;
+ std::unique_ptr<IParser> parser = nullptr;
+ // Forward to implementation based on the parser type
+ if (modelFormat.find("armnn") != std::string::npos)
+ {
+#if defined(ARMNN_SERIALIZER)
+ parser = std::make_unique<ArmNNDeserializer>();
+#else
+ LogAndThrow("Not built with serialization support.");
+#endif
+ }
+ else if(modelFormat.find("tflite") != std::string::npos)
+ {
+#if defined(ARMNN_TF_LITE_PARSER)
+ parser = std::make_unique<TfliteParser>(m_Params);
+#else
+ LogAndThrow("Not built with Tensorflow-Lite parser support.");
+#endif
+ }
+ else if (modelFormat.find("onnx") != std::string::npos)
+ {
+#if defined(ARMNN_ONNX_PARSER)
+ parser = std::make_unique<OnnxParser>();
+#else
+ LogAndThrow("Not built with Onnx parser support.");
+#endif
+ }
+
+ return parser;
+}
+
+void ArmNNExecutor::PrintOutputTensors(const armnn::OutputTensors* outputTensors,
+ unsigned int iteration)
+{
+ auto findOutputName = [&](const armnn::LayerBindingId id)
+ {
+ for (auto it = m_IOInfo.m_OutputInfoMap.begin(); it != m_IOInfo.m_OutputInfoMap.end(); ++it)
+ {
+ if (id == it->second.first)
+ {
+ return it->first;
+ }
+ }
+ return std::string{};
+ };
+
+ unsigned int outputIndex = 0;
+ unsigned int numOutputs = outputTensors->size();
+ for (const auto& output: *outputTensors)
+ {
+ const auto bindingName = findOutputName(output.first);
+ // We've made sure before that the number of output files either equals numOutputs, in which
+ // case we override those files when processing the results of each iteration (only the result
+ // of the last iteration will be stored), or there are enough
+ // output files for each output of each iteration.
+ size_t outputFileIndex = iteration * numOutputs + outputIndex;
+ if (!m_Params.m_OutputTensorFiles.empty())
+ {
+ outputFileIndex = outputFileIndex % m_Params.m_OutputTensorFiles.size();
+ ARMNN_LOG(info) << "Writing output: " << bindingName << " bindingId: '"
+ << output.first
+ << "' of iteration: " << iteration + 1 << " to file: '"
+ << m_Params.m_OutputTensorFiles[outputFileIndex] << "'";
+ }
+
+ const armnn::Optional<std::string> outputTensorFile = m_Params.m_OutputTensorFiles.empty() ?
+ armnn::EmptyOptional() :
+ armnn::MakeOptional<std::string>(
+ m_Params.m_OutputTensorFiles[outputFileIndex]);
+
+ OutputWriteInfo outputWriteInfo
+ {
+ outputTensorFile,
+ bindingName,
+ output.second,
+ !m_Params.m_DontPrintOutputs
+ };
+
+ std::cout << bindingName << ": ";
+ std::vector<float> values;
+ switch (output.second.GetDataType())
+ {
+ case armnn::DataType::Float32:
+ {
+ PrintTensor<float>(outputWriteInfo, "%f ");
+ break;
+ }
+
+ case armnn::DataType::Signed32:
+ {
+ PrintTensor<int>(outputWriteInfo, "%d ");
+ break;
+ }
+ case armnn::DataType::QSymmS8:
+ case armnn::DataType::QAsymmS8:
+ {
+ PrintTensor<int8_t>(outputWriteInfo, "%d ");
+ break;
+ }
+ case armnn::DataType::QAsymmU8:
+ {
+ PrintTensor<uint8_t>(outputWriteInfo, "%d ");
+ break;
+ }
+ case armnn::DataType::Float16:
+ case armnn::DataType::QSymmS16:
+ case armnn::DataType::BFloat16:
+ case armnn::DataType::Boolean:
+ case armnn::DataType::Signed64:
+ default:
+ {
+ LogAndThrow("Unexpected DataType");
+ }
+ }
+ std::cout << "\n";
+ }
+}
+
+void ArmNNExecutor::CompareAndPrintResult(std::vector<const void*> otherOutput)
+{
+ unsigned int index = 0;
+
+ for (const auto& outputTensors: m_OutputTensorsVec)
+ {
+ for (const auto& outputTensor: outputTensors)
+ {
+ float result = 0;
+ size_t size = outputTensor.second.GetNumBytes();
+
+ switch (outputTensor.second.GetDataType())
+ {
+ case armnn::DataType::Float32:
+ {
+ result = ComputeRMSE<float>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+ break;
+ }
+ case armnn::DataType::QSymmS16:
+ {
+ result = ComputeRMSE<int16_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+ break;
+ }
+ case armnn::DataType::QSymmS8:
+ {
+ result = ComputeRMSE<int8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+ break;
+ }
+ case armnn::DataType::QAsymmU8:
+ case armnn::DataType::QAsymmS8:
+ {
+ result = ComputeRMSE<uint8_t>(outputTensor.second.GetMemoryArea(), otherOutput[index++], size);
+ break;
+ }
+ default:
+ {
+ LogAndThrow("Unexpected DataType");
+ }
+ }
+ std::cout << "RMSE: of " << result << "\n";
+ }
+ }
+}
+#if defined(ARMNN_SERIALIZER)
+ArmNNExecutor::ArmNNDeserializer::ArmNNDeserializer() : m_Parser(armnnDeserializer::IDeserializer::Create()){}
+
+armnn::INetworkPtr ArmNNExecutor::ArmNNDeserializer::CreateNetwork(const ExecuteNetworkParams& params)
+{
+ const std::string& modelPath = params.m_ModelPath;
+
+ std::ifstream file(modelPath, std::ios::binary);
+ return m_Parser->CreateNetworkFromBinary(file);
+}
+
+armnn::BindingPointInfo
+ArmNNExecutor::ArmNNDeserializer::GetInputBindingPointInfo(size_t, const std::string& inputName)
+{
+ armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkInputBindingInfo(0, inputName);
+ return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
+}
+
+armnn::BindingPointInfo
+ArmNNExecutor::ArmNNDeserializer::GetOutputBindingPointInfo(size_t, const std::string& outputName)
+{
+ armnnDeserializer::BindingPointInfo DeserializerBPI = m_Parser->GetNetworkOutputBindingInfo(0, outputName);
+ return {DeserializerBPI.m_BindingId, DeserializerBPI.m_TensorInfo};
+}
+#endif
+
+#if defined(ARMNN_TF_LITE_PARSER)
+ArmNNExecutor::TfliteParser::TfliteParser(const ExecuteNetworkParams& params)
+{
+ armnnTfLiteParser::ITfLiteParser::TfLiteParserOptions options;
+ options.m_StandInLayerForUnsupported = params.m_ParseUnsupported;
+ options.m_InferAndValidate = params.m_InferOutputShape;
+
+ m_Parser = armnnTfLiteParser::ITfLiteParser::Create(options);
+}
+
+armnn::INetworkPtr ArmNNExecutor::TfliteParser::CreateNetwork(const ExecuteNetworkParams& params)
+{
+ const std::string& modelPath = params.m_ModelPath;
+ return m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str());
+}
+
+armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetInputBindingPointInfo(size_t subgraphId,
+ const std::string& inputName)
+{
+ return m_Parser->GetNetworkInputBindingInfo(subgraphId, inputName);
+}
+
+armnn::BindingPointInfo ArmNNExecutor::TfliteParser::GetOutputBindingPointInfo(size_t subgraphId,
+ const std::string& outputName)
+{
+ return m_Parser->GetNetworkOutputBindingInfo(subgraphId, outputName);
+}
+#endif
+
+
+#if defined(ARMNN_ONNX_PARSER)
+ArmNNExecutor::OnnxParser::OnnxParser() : m_Parser(armnnOnnxParser::IOnnxParser::Create()){}
+
+armnn::INetworkPtr ArmNNExecutor::OnnxParser::CreateNetwork(const ExecuteNetworkParams& params)
+{
+ const std::string& modelPath = params.m_ModelPath;
+ m_Parser = armnnOnnxParser::IOnnxParser::Create();
+ std::map<std::string, armnn::TensorShape> inputShapes;
+ if(!params.m_InputTensorShapes.empty())
+ {
+ const size_t numInputShapes = params.m_InputTensorShapes.size();
+ const size_t numInputBindings = params.m_InputNames.size();
+ if(numInputShapes < numInputBindings)
+ {
+ throw armnn::Exception(
+ fmt::format("Not every input has its tensor shape specified: expected={0}, got={1}",
+ numInputBindings, numInputShapes));
+ }
+
+ for (size_t i = 0; i < numInputShapes; i++)
+ {
+ inputShapes[params.m_InputNames[i]] = params.m_InputTensorShapes[i];
+ }
+
+ return params.m_IsModelBinary ?
+ m_Parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes) :
+ m_Parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes);
+ }
+
+ // Handle text and binary input differently by calling the corresponding parser function
+ return params.m_IsModelBinary ?
+ m_Parser->CreateNetworkFromBinaryFile(params.m_ModelPath.c_str()) :
+ m_Parser->CreateNetworkFromTextFile(params.m_ModelPath.c_str());
+}
+
+armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetInputBindingPointInfo(size_t, const std::string& inputName)
+{
+ return m_Parser->GetNetworkInputBindingInfo(inputName);
+}
+
+armnn::BindingPointInfo ArmNNExecutor::OnnxParser::GetOutputBindingPointInfo(size_t, const std::string& outputName)
+{
+ return m_Parser->GetNetworkOutputBindingInfo(outputName);
+}
+#endif