// // Copyright © 2020 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "ArmnnDriver.hpp" #include "ArmnnDriverImpl.hpp" #include "RequestThread_1_3.hpp" #include "ModelToINetworkConverter.hpp" #include #include #include #include #include namespace armnn_driver { using CallbackAsync_1_3 = std::function< void(V1_3::ErrorStatus errorStatus, std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, const ::android::hardware::neuralnetworks::V1_2::Timing& timing, std::string callingFunction)>; struct ExecutionContext_1_3 { ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; TimePoint driverStart; TimePoint driverEnd; TimePoint deviceStart; TimePoint deviceEnd; }; using CallbackContext_1_3 = CallbackContext; using executeFenced_cb = std::function& callback)>; template class ArmnnPreparedModel_1_3 : public V1_3::IPreparedModel { public: using HalModel = typename V1_3::Model; ArmnnPreparedModel_1_3(armnn::NetworkId networkId, armnn::IRuntime* runtime, const HalModel& model, const std::string& requestInputsAndOutputsDumpDir, const bool gpuProfilingEnabled, V1_3::Priority priority = V1_3::Priority::MEDIUM, const bool asyncModelExecutionEnabled = false, const unsigned int numberOfThreads = 1, const bool importEnabled = false, const bool exportEnabled = false); ArmnnPreparedModel_1_3(armnn::NetworkId networkId, armnn::IRuntime* runtime, const std::string& requestInputsAndOutputsDumpDir, const bool gpuProfilingEnabled, V1_3::Priority priority = V1_3::Priority::MEDIUM, const bool asyncModelExecutionEnabled = false, const unsigned int numberOfThreads = 1, const bool importEnabled = false, const bool exportEnabled = false, const bool preparedFromCache = false); virtual ~ArmnnPreparedModel_1_3(); Return execute(const V1_0::Request& request, const ::android::sp& callback) override; Return execute_1_2(const V1_0::Request& request, V1_2::MeasureTiming measure, const ::android::sp& callback) override; Return execute_1_3(const V1_3::Request& request, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint&, const V1_3::OptionalTimeoutDuration&, const ::android::sp& callback) override; Return executeSynchronously(const V1_0::Request &request, V1_2::MeasureTiming measure, V1_3::IPreparedModel::executeSynchronously_cb cb) override; Return executeSynchronously_1_3(const V1_3::Request &request, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint& deadline, const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override; Return executeFenced(const V1_3::Request& request, const android::hardware::hidl_vec& fenceWaitFor, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint& deadline, const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, const V1_3::OptionalTimeoutDuration& duration, executeFenced_cb callback) override; Return configureExecutionBurst( const ::android::sp& callback, const android::hardware::MQDescriptorSync& requestChannel, const android::hardware::MQDescriptorSync& resultChannel, configureExecutionBurst_cb cb) override; template Return ExecuteSynchronously(const V1_3::Request& request, CallbackContext cbCtx); /// execute the graph prepared from the request template Return ExecuteGraph( std::shared_ptr>& pMemPools, armnn::InputTensors& inputTensors, armnn::OutputTensors& outputTensors, CallbackContext callback); /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true bool ExecuteWithDummyInputs(unsigned int numInputs, unsigned int numOutputs); V1_3::Priority GetModelPriority(); private: template class ArmnnThreadPoolCallback_1_3 : public armnn::IAsyncExecutionCallback { public: ArmnnThreadPoolCallback_1_3(ArmnnPreparedModel_1_3* model, std::shared_ptr>& pMemPools, std::vector outputShapes, std::shared_ptr& inputTensors, std::shared_ptr& outputTensors, CallbackContext callbackContext) : m_Model(model), m_MemPools(pMemPools), m_OutputShapes(outputShapes), m_InputTensors(inputTensors), m_OutputTensors(outputTensors), m_CallbackContext(callbackContext) {} void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override; ArmnnPreparedModel_1_3* m_Model; std::shared_ptr> m_MemPools; std::vector m_OutputShapes; std::shared_ptr m_InputTensors; std::shared_ptr m_OutputTensors; CallbackContext m_CallbackContext; }; Return Execute(const V1_3::Request& request, V1_2::MeasureTiming measureTiming, CallbackAsync_1_3 callback); Return PrepareMemoryForInputs( armnn::InputTensors& inputs, const V1_3::Request& request, const std::vector& memPools); Return PrepareMemoryForOutputs( armnn::OutputTensors& outputs, std::vector &outputShapes, const V1_3::Request& request, const std::vector& memPools); std::tuple, V1_2::Timing, std::string> PrepareMemoryForIO( armnn::InputTensors& inputs, armnn::OutputTensors& outputs, std::vector& memPools, const V1_3::Request& request); template void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); /// schedule the graph prepared from the request for execution template void ScheduleGraphForExecution( std::shared_ptr>& pMemPools, std::shared_ptr& inputTensors, std::shared_ptr& outputTensors, CallbackContext m_CallbackContext, armnn::QosExecPriority priority); armnn::NetworkId m_NetworkId; armnn::IRuntime* m_Runtime; V1_3::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here static RequestThread_1_3 m_RequestThread; uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; V1_3::Priority m_ModelPriority; // Static to allow sharing of threadpool between ArmnnPreparedModel instances static std::unique_ptr m_Threadpool; std::shared_ptr m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; const bool m_EnableImport; const bool m_EnableExport; const bool m_PreparedFromCache; }; }