// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include #if defined(ARMNN_TF_LITE_PARSER) #include #endif #include #if defined(ARMNN_ONNX_PARSER) #include #endif #include #include #include #include #include #include #include #include #include #include #include #include namespace { inline bool CheckRequestedBackendsAreValid(const std::vector& backendIds, armnn::Optional invalidBackendIds = armnn::EmptyOptional()) { if (backendIds.empty()) { return false; } armnn::BackendIdSet validBackendIds = armnn::BackendRegistryInstance().GetBackendIds(); bool allValid = true; for (const auto& backendId : backendIds) { if (std::find(validBackendIds.begin(), validBackendIds.end(), backendId) == validBackendIds.end()) { allValid = false; if (invalidBackendIds) { if (!invalidBackendIds.value().empty()) { invalidBackendIds.value() += ", "; } invalidBackendIds.value() += backendId; } } } return allValid; } } // anonymous namespace namespace InferenceModelInternal { // This needs to go when the armnnCaffeParser, armnnTfParser and armnnTfLiteParser // definitions of BindingPointInfo gets consolidated. using BindingPointInfo = std::pair; using QuantizationParams = std::pair; struct Params { std::string m_ModelPath; std::string m_InputBinding; std::string m_OutputBinding; const armnn::TensorShape* m_InputTensorShape; std::vector m_ComputeDevice; bool m_EnableProfiling; size_t m_SubgraphId; bool m_IsModelBinary; bool m_VisualizePostOptimizationModel; bool m_EnableFp16TurboMode; Params() : m_InputTensorShape(nullptr) , m_ComputeDevice{armnn::Compute::CpuRef} , m_EnableProfiling(false) , m_SubgraphId(0) , m_IsModelBinary(true) , m_VisualizePostOptimizationModel(false) , m_EnableFp16TurboMode(false) {} }; } // namespace InferenceModelInternal template struct CreateNetworkImpl { public: using Params = InferenceModelInternal::Params; using BindingPointInfo = InferenceModelInternal::BindingPointInfo; static armnn::INetworkPtr Create(const Params& params, BindingPointInfo& inputBindings, BindingPointInfo& outputBindings) { const std::string& modelPath = params.m_ModelPath; // Create a network from a file on disk auto parser(IParser::Create()); std::map inputShapes; if (params.m_InputTensorShape) { inputShapes[params.m_InputBinding] = *params.m_InputTensorShape; } std::vector requestedOutputs{ params.m_OutputBinding }; armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; { ARMNN_SCOPED_HEAP_PROFILING("Parsing"); // Handle text and binary input differently by calling the corresponding parser function network = (params.m_IsModelBinary ? parser->CreateNetworkFromBinaryFile(modelPath.c_str(), inputShapes, requestedOutputs) : parser->CreateNetworkFromTextFile(modelPath.c_str(), inputShapes, requestedOutputs)); } inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding); outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding); return network; } }; #if defined(ARMNN_TF_LITE_PARSER) template <> struct CreateNetworkImpl { public: using IParser = armnnTfLiteParser::ITfLiteParser; using Params = InferenceModelInternal::Params; using BindingPointInfo = InferenceModelInternal::BindingPointInfo; static armnn::INetworkPtr Create(const Params& params, BindingPointInfo& inputBindings, BindingPointInfo& outputBindings) { const std::string& modelPath = params.m_ModelPath; // Create a network from a file on disk auto parser(IParser::Create()); armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; { ARMNN_SCOPED_HEAP_PROFILING("Parsing"); network = parser->CreateNetworkFromBinaryFile(modelPath.c_str()); } inputBindings = parser->GetNetworkInputBindingInfo(params.m_SubgraphId, params.m_InputBinding); outputBindings = parser->GetNetworkOutputBindingInfo(params.m_SubgraphId, params.m_OutputBinding); return network; } }; #endif #if defined(ARMNN_ONNX_PARSER) template <> struct CreateNetworkImpl { public: using IParser = armnnOnnxParser::IOnnxParser; using Params = InferenceModelInternal::Params; using BindingPointInfo = InferenceModelInternal::BindingPointInfo; static armnn::INetworkPtr Create(const Params& params, BindingPointInfo& inputBindings, BindingPointInfo& outputBindings) { const std::string& modelPath = params.m_ModelPath; // Create a network from a file on disk auto parser(IParser::Create()); armnn::INetworkPtr network{nullptr, [](armnn::INetwork *){}}; { ARMNN_SCOPED_HEAP_PROFILING("Parsing"); network = (params.m_IsModelBinary ? parser->CreateNetworkFromBinaryFile(modelPath.c_str()) : parser->CreateNetworkFromTextFile(modelPath.c_str())); } inputBindings = parser->GetNetworkInputBindingInfo(params.m_InputBinding); outputBindings = parser->GetNetworkOutputBindingInfo(params.m_OutputBinding); return network; } }; #endif template inline armnn::InputTensors MakeInputTensors(const InferenceModelInternal::BindingPointInfo& input, const TContainer& inputTensorData) { if (inputTensorData.size() != input.second.GetNumElements()) { try { throw armnn::Exception(boost::str(boost::format("Input tensor has incorrect size. Expected %1% elements " "but got %2%.") % input.second.GetNumElements() % inputTensorData.size())); } catch (const boost::exception& e) { // Coverity fix: it should not be possible to get here but boost::str and boost::format can both // throw uncaught exceptions, convert them to armnn exceptions and rethrow. throw armnn::Exception(diagnostic_information(e)); } } return { { input.first, armnn::ConstTensor(input.second, inputTensorData.data()) } }; } template inline armnn::OutputTensors MakeOutputTensors(const InferenceModelInternal::BindingPointInfo& output, TContainer& outputTensorData) { if (outputTensorData.size() != output.second.GetNumElements()) { throw armnn::Exception("Output tensor has incorrect size"); } return { { output.first, armnn::Tensor(output.second, outputTensorData.data()) } }; } template class InferenceModel { public: using DataType = TDataType; using Params = InferenceModelInternal::Params; struct CommandLineOptions { std::string m_ModelDir; std::vector m_ComputeDevice; bool m_VisualizePostOptimizationModel; bool m_EnableFp16TurboMode; }; static void AddCommandLineOptions(boost::program_options::options_description& desc, CommandLineOptions& options) { namespace po = boost::program_options; std::vector defaultBackends = {armnn::Compute::CpuAcc, armnn::Compute::CpuRef}; const std::string backendsMessage = "Which device to run layers on by default. Possible choices: " + armnn::BackendRegistryInstance().GetBackendIdsAsString(); desc.add_options() ("model-dir,m", po::value(&options.m_ModelDir)->required(), "Path to directory containing model files (.caffemodel/.prototxt/.tflite)") ("compute,c", po::value>(&options.m_ComputeDevice)->default_value (defaultBackends), backendsMessage.c_str()) ("visualize-optimized-model,v", po::value(&options.m_VisualizePostOptimizationModel)->default_value(false), "Produce a dot file useful for visualizing the graph post optimization." "The file will have the same name as the model with the .dot extention.") ("fp16-turbo-mode", po::value(&options.m_EnableFp16TurboMode)->default_value(false), "If this option is enabled FP32 layers, weights and biases will be converted " "to FP16 where the backend supports it."); } InferenceModel(const Params& params, const std::shared_ptr& runtime = nullptr) : m_EnableProfiling(params.m_EnableProfiling) { if (runtime) { m_Runtime = runtime; } else { armnn::IRuntime::CreationOptions options; options.m_EnableGpuProfiling = m_EnableProfiling; m_Runtime = std::move(armnn::IRuntime::Create(options)); } std::string invalidBackends; if (!CheckRequestedBackendsAreValid(params.m_ComputeDevice, armnn::Optional(invalidBackends))) { throw armnn::Exception("Some backend IDs are invalid: " + invalidBackends); } armnn::INetworkPtr network = CreateNetworkImpl::Create(params, m_InputBindingInfo, m_OutputBindingInfo); armnn::IOptimizedNetworkPtr optNet{nullptr, [](armnn::IOptimizedNetwork *){}}; { ARMNN_SCOPED_HEAP_PROFILING("Optimizing"); armnn::OptimizerOptions options; options.m_ReduceFp32ToFp16 = params.m_EnableFp16TurboMode; optNet = armnn::Optimize(*network, params.m_ComputeDevice, m_Runtime->GetDeviceSpec(), options); if (!optNet) { throw armnn::Exception("Optimize returned nullptr"); } } if (params.m_VisualizePostOptimizationModel) { boost::filesystem::path filename = params.m_ModelPath; filename.replace_extension("dot"); std::fstream file(filename.c_str(),file.out); optNet->SerializeToDot(file); } armnn::Status ret; { ARMNN_SCOPED_HEAP_PROFILING("LoadNetwork"); ret = m_Runtime->LoadNetwork(m_NetworkIdentifier, std::move(optNet)); } if (ret == armnn::Status::Failure) { throw armnn::Exception("IRuntime::LoadNetwork failed"); } } unsigned int GetOutputSize() const { return m_OutputBindingInfo.second.GetNumElements(); } void Run(const std::vector& input, std::vector& output) { BOOST_ASSERT(output.size() == GetOutputSize()); std::shared_ptr profiler = m_Runtime->GetProfiler(m_NetworkIdentifier); if (profiler) { profiler->EnableProfiling(m_EnableProfiling); } armnn::Status ret = m_Runtime->EnqueueWorkload(m_NetworkIdentifier, MakeInputTensors(input), MakeOutputTensors(output)); // if profiling is enabled print out the results if (profiler && profiler->IsProfilingEnabled()) { profiler->Print(std::cout); } if (ret == armnn::Status::Failure) { throw armnn::Exception("IRuntime::EnqueueWorkload failed"); } } const InferenceModelInternal::BindingPointInfo & GetInputBindingInfo() const { return m_InputBindingInfo; } const InferenceModelInternal::BindingPointInfo & GetOutputBindingInfo() const { return m_OutputBindingInfo; } InferenceModelInternal::QuantizationParams GetQuantizationParams() const { return std::make_pair(m_OutputBindingInfo.second.GetQuantizationScale(), m_OutputBindingInfo.second.GetQuantizationOffset()); } private: armnn::NetworkId m_NetworkIdentifier; std::shared_ptr m_Runtime; InferenceModelInternal::BindingPointInfo m_InputBindingInfo; InferenceModelInternal::BindingPointInfo m_OutputBindingInfo; bool m_EnableProfiling; template armnn::InputTensors MakeInputTensors(const TContainer& inputTensorData) { return ::MakeInputTensors(m_InputBindingInfo, inputTensorData); } template armnn::OutputTensors MakeOutputTensors(TContainer& outputTensorData) { return ::MakeOutputTensors(m_OutputBindingInfo, outputTensorData); } };