// // Copyright © 2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "Types.hpp" #include "armnn/ArmNN.hpp" #include #include #include #include #include #include #include #include #include #include #include #include #include #include namespace common { /** * @brief Used to load in a network through Tflite Interpreter, * register Armnn Delegate file to it, and run inference * on it against a given backend. * currently it is assumed that the input data will be * cv:MAT (Frame), the assumption is implemented in * PrepareTensors method, it can be generalized later * */ template class ArmnnNetworkExecutor { private: std::unique_ptr m_interpreter; std::unique_ptr m_model; Profiling m_profiling; void PrepareTensors(const void* inputData, const size_t dataBytes); template auto log_as_int(Enumeration value) -> typename std::underlying_type::type { return static_cast::type>(value); } public: ArmnnNetworkExecutor() = delete; /** * @brief Initializes the network with the given input data. * * * * @param[in] modelPath - Relative path to the model file * * @param[in] backends - The list of preferred backends to run inference on */ ArmnnNetworkExecutor(std::string& modelPath, std::vector& backends, bool isProfilingEnabled = false); /** * @brief Returns the aspect ratio of the associated model in the order of width, height. */ Size GetImageAspectRatio(); /** * @brief Returns the data type of the associated model. */ armnn::DataType GetInputDataType() const; float GetQuantizationScale(); int GetQuantizationOffset(); float GetOutputQuantizationScale(int tensorIndex); int GetOutputQuantizationOffset(int tensorIndex); /** * @brief Runs inference on the provided input data, and stores the results * in the provided InferenceResults object. * * @param[in] inputData - input frame data * @param[in] dataBytes - input data size in bytes * @param[out] outResults - Vector of DetectionResult objects used to store the output result. */ bool Run(const void *inputData, const size_t dataBytes, InferenceResults &outResults); }; template ArmnnNetworkExecutor::ArmnnNetworkExecutor(std::string& modelPath, std::vector& preferredBackends, bool isProfilingEnabled): m_profiling(isProfilingEnabled) { m_profiling.ProfilingStart(); armnn::OptimizerOptions optimizerOptions; m_model = tflite::FlatBufferModel::BuildFromFile(modelPath.c_str()); if (m_model == nullptr) { const std::string errorMessage{"ArmnnNetworkExecutor: Failed to build the model"}; ARMNN_LOG(error) << errorMessage; throw armnn::Exception(errorMessage); } m_profiling.ProfilingStopAndPrintUs("Loading the model took"); m_profiling.ProfilingStart(); tflite::ops::builtin::BuiltinOpResolver resolver; tflite::InterpreterBuilder(*m_model, resolver)(&m_interpreter); if (m_interpreter->AllocateTensors() != kTfLiteOk) { const std::string errorMessage{"ArmnnNetworkExecutor: Failed to alloc tensors"}; ARMNN_LOG(error) << errorMessage; throw armnn::Exception(errorMessage); } m_profiling.ProfilingStopAndPrintUs("Create the tflite interpreter"); /* create delegate options */ m_profiling.ProfilingStart(); /* enable fast math optimization */ armnn::BackendOptions modelOptionGpu("GpuAcc", {{"FastMathEnabled", true}}); optimizerOptions.m_ModelOptions.push_back(modelOptionGpu); armnn::BackendOptions modelOptionCpu("CpuAcc", {{"FastMathEnabled", true}}); optimizerOptions.m_ModelOptions.push_back(modelOptionCpu); /* enable reduce float32 to float16 optimization */ optimizerOptions.m_ReduceFp32ToFp16 = true; armnnDelegate::DelegateOptions delegateOptions(preferredBackends, optimizerOptions); /* create delegate object */ std::unique_ptr theArmnnDelegate(armnnDelegate::TfLiteArmnnDelegateCreate(delegateOptions), armnnDelegate::TfLiteArmnnDelegateDelete); /* Register the delegate file */ m_interpreter->ModifyGraphWithDelegate(std::move(theArmnnDelegate)); m_profiling.ProfilingStopAndPrintUs("Create and load ArmNN Delegate"); } template void ArmnnNetworkExecutor::PrepareTensors(const void *inputData, const size_t dataBytes) { size_t inputTensorSize = m_interpreter->input_tensor(0)->bytes; auto * inputTensorPtr = m_interpreter->input_tensor(0)->data.raw; assert(inputTensorSize >= dataBytes); if (inputTensorPtr != nullptr) { memcpy(inputTensorPtr, inputData, inputTensorSize); } else { const std::string errorMessage{"ArmnnNetworkExecutor: input tensor is null"}; ARMNN_LOG(error) << errorMessage; throw armnn::Exception(errorMessage); } } template bool ArmnnNetworkExecutor::Run(const void *inputData, const size_t dataBytes, InferenceResults& outResults) { bool ret = false; m_profiling.ProfilingStart(); PrepareTensors(inputData, dataBytes); if (m_interpreter->Invoke() == kTfLiteOk) { ret = true; // Extract the output tensor data. outResults.clear(); outResults.reserve(m_interpreter->outputs().size()); for (int index = 0; index < m_interpreter->outputs().size(); index++) { size_t size = m_interpreter->output_tensor(index)->bytes / sizeof(Tout); const Tout *p_Output = m_interpreter->typed_output_tensor(index); if (p_Output != nullptr) { InferenceResult outRes(p_Output, p_Output + size); outResults.emplace_back(outRes); } else { const std::string errorMessage{"ArmnnNetworkExecutor: p_Output tensor is null"}; ARMNN_LOG(error) << errorMessage; ret = false; } } } else { const std::string errorMessage{"ArmnnNetworkExecutor: Invoke has failed"}; ARMNN_LOG(error) << errorMessage; } m_profiling.ProfilingStopAndPrintUs("Perform inference"); return ret; } template Size ArmnnNetworkExecutor::GetImageAspectRatio() { assert(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->size == 4); return Size(m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[2], m_interpreter->tensor(m_interpreter->inputs()[0])->dims->data[1]); } template armnn::DataType ArmnnNetworkExecutor::GetInputDataType() const { return GetDataType(*(m_interpreter->tensor(m_interpreter->inputs()[0]))); } template float ArmnnNetworkExecutor::GetQuantizationScale() { return m_interpreter->tensor(m_interpreter->inputs()[0])->params.scale; } template int ArmnnNetworkExecutor::GetQuantizationOffset() { return m_interpreter->tensor(m_interpreter->inputs()[0])->params.zero_point; } template float ArmnnNetworkExecutor::GetOutputQuantizationScale(int tensorIndex) { assert(m_interpreter->outputs().size() > tensorIndex); return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.scale; } template int ArmnnNetworkExecutor::GetOutputQuantizationOffset(int tensorIndex) { assert(m_interpreter->outputs().size() > tensorIndex); return m_interpreter->tensor(m_interpreter->outputs()[tensorIndex])->params.zero_point; } }// namespace common