// // Copyright © 2020 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "Types.hpp" #include "armnn/ArmNN.hpp" #include "armnnTfLiteParser/ITfLiteParser.hpp" #include "armnnUtils/DataLayoutIndexed.hpp" #include #include #include namespace common { /** * @brief Used to load in a network through ArmNN and run inference on it against a given backend. * */ template class ArmnnNetworkExecutor { private: armnn::IRuntimePtr m_Runtime; armnn::NetworkId m_NetId{}; mutable InferenceResults m_OutputBuffer; armnn::InputTensors m_InputTensors; armnn::OutputTensors m_OutputTensors; std::vector m_outputBindingInfo; std::vector m_outputLayerNamesList; armnnTfLiteParser::BindingPointInfo m_inputBindingInfo; void PrepareTensors(const void* inputData, const size_t dataBytes); template auto log_as_int(Enumeration value) -> typename std::underlying_type::type { return static_cast::type>(value); } public: ArmnnNetworkExecutor() = delete; /** * @brief Initializes the network with the given input data. Parsed through TfLiteParser and optimized for a * given backend. * * Note that the output layers names order in m_outputLayerNamesList affects the order of the feature vectors * in output of the Run method. * * * @param[in] modelPath - Relative path to the model file * * @param[in] backends - The list of preferred backends to run inference on */ ArmnnNetworkExecutor(std::string& modelPath, std::vector& backends); /** * @brief Returns the aspect ratio of the associated model in the order of width, height. */ Size GetImageAspectRatio(); armnn::DataType GetInputDataType() const; float GetQuantizationScale(); int GetQuantizationOffset(); /** * @brief Runs inference on the provided input data, and stores the results in the provided InferenceResults object. * * @param[in] inputData - input frame data * @param[in] dataBytes - input data size in bytes * @param[out] results - Vector of DetectionResult objects used to store the output result. */ bool Run(const void* inputData, const size_t dataBytes, common::InferenceResults& outResults); }; template ArmnnNetworkExecutor::ArmnnNetworkExecutor(std::string& modelPath, std::vector& preferredBackends) : m_Runtime(armnn::IRuntime::Create(armnn::IRuntime::CreationOptions())) { // Import the TensorFlow lite model. armnnTfLiteParser::ITfLiteParserPtr parser = armnnTfLiteParser::ITfLiteParser::Create(); armnn::INetworkPtr network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); std::vector inputNames = parser->GetSubgraphInputTensorNames(0); m_inputBindingInfo = parser->GetNetworkInputBindingInfo(0, inputNames[0]); m_outputLayerNamesList = parser->GetSubgraphOutputTensorNames(0); std::vector outputBindings; for(const std::string& name : m_outputLayerNamesList) { m_outputBindingInfo.push_back(std::move(parser->GetNetworkOutputBindingInfo(0, name))); } std::vector errorMessages; // optimize the network. armnn::IOptimizedNetworkPtr optNet = Optimize(*network, preferredBackends, m_Runtime->GetDeviceSpec(), armnn::OptimizerOptions(), armnn::Optional&>(errorMessages)); if (!optNet) { const std::string errorMessage{"ArmnnNetworkExecutor: Failed to optimize network"}; ARMNN_LOG(error) << errorMessage; throw armnn::Exception(errorMessage); } // Load the optimized network onto the m_Runtime device std::string errorMessage; if (armnn::Status::Success != m_Runtime->LoadNetwork(m_NetId, std::move(optNet), errorMessage)) { ARMNN_LOG(error) << errorMessage; throw armnn::Exception(errorMessage); } //pre-allocate memory for output (the size of it never changes) for (int it = 0; it < m_outputLayerNamesList.size(); ++it) { const armnn::DataType dataType = m_outputBindingInfo[it].second.GetDataType(); const armnn::TensorShape& tensorShape = m_outputBindingInfo[it].second.GetShape(); std::vector oneLayerOutResult; oneLayerOutResult.resize(tensorShape.GetNumElements(), 0); m_OutputBuffer.emplace_back(oneLayerOutResult); // Make ArmNN output tensors m_OutputTensors.reserve(m_OutputBuffer.size()); for (size_t it = 0; it < m_OutputBuffer.size(); ++it) { m_OutputTensors.emplace_back(std::make_pair( m_outputBindingInfo[it].first, armnn::Tensor(m_outputBindingInfo[it].second, m_OutputBuffer.at(it).data()) )); } } } template armnn::DataType ArmnnNetworkExecutor::GetInputDataType() const { return m_inputBindingInfo.second.GetDataType(); } template void ArmnnNetworkExecutor::PrepareTensors(const void* inputData, const size_t dataBytes) { assert(m_inputBindingInfo.second.GetNumBytes() >= dataBytes); m_InputTensors.clear(); m_InputTensors = {{ m_inputBindingInfo.first, armnn::ConstTensor(m_inputBindingInfo.second, inputData)}}; } template bool ArmnnNetworkExecutor::Run(const void* inputData, const size_t dataBytes, InferenceResults& outResults) { /* Prepare tensors if they are not ready */ ARMNN_LOG(debug) << "Preparing tensors..."; this->PrepareTensors(inputData, dataBytes); ARMNN_LOG(trace) << "Running inference..."; armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetId, m_InputTensors, m_OutputTensors); std::stringstream inferenceFinished; inferenceFinished << "Inference finished with code {" << log_as_int(ret) << "}\n"; ARMNN_LOG(trace) << inferenceFinished.str(); if (ret == armnn::Status::Failure) { ARMNN_LOG(error) << "Failed to perform inference."; } outResults.reserve(m_outputLayerNamesList.size()); outResults = m_OutputBuffer; return (armnn::Status::Success == ret); } template float ArmnnNetworkExecutor::GetQuantizationScale() { return this->m_inputBindingInfo.second.GetQuantizationScale(); } template int ArmnnNetworkExecutor::GetQuantizationOffset() { return this->m_inputBindingInfo.second.GetQuantizationOffset(); } template Size ArmnnNetworkExecutor::GetImageAspectRatio() { const auto shape = m_inputBindingInfo.second.GetShape(); assert(shape.GetNumDimensions() == 4); armnnUtils::DataLayoutIndexed nhwc(armnn::DataLayout::NHWC); return Size(shape[nhwc.GetWidthIndex()], shape[nhwc.GetHeightIndex()]); } }// namespace common