// // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "ArmnnDriver.hpp" #include "ArmnnDriverImpl.hpp" #include "RequestThread.hpp" #include "ModelToINetworkConverter.hpp" #include #include #include #include namespace armnn_driver { using CallbackAsync_1_2 = std::function< void(V1_0::ErrorStatus errorStatus, std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, const ::android::hardware::neuralnetworks::V1_2::Timing& timing, std::string callingFunction)>; struct ExecutionContext_1_2 { ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; TimePoint driverStart; }; using CallbackContext_1_2 = CallbackContext; template class ArmnnPreparedModel_1_2 : public V1_2::IPreparedModel { public: using HalModel = typename V1_2::Model; ArmnnPreparedModel_1_2(armnn::NetworkId networkId, armnn::IRuntime* runtime, const HalModel& model, const std::string& requestInputsAndOutputsDumpDir, const bool gpuProfilingEnabled, const bool asyncModelExecutionEnabled = false); virtual ~ArmnnPreparedModel_1_2(); virtual Return execute(const V1_0::Request& request, const ::android::sp& callback) override; virtual Return execute_1_2(const V1_0::Request& request, V1_2::MeasureTiming measure, const ::android::sp& callback) override; virtual Return executeSynchronously(const V1_0::Request &request, V1_2::MeasureTiming measure, V1_2::IPreparedModel::executeSynchronously_cb cb) override; virtual Return configureExecutionBurst( const ::android::sp& callback, const android::hardware::MQDescriptorSync& requestChannel, const android::hardware::MQDescriptorSync& resultChannel, configureExecutionBurst_cb cb) override; /// execute the graph prepared from the request template bool ExecuteGraph(std::shared_ptr>& pMemPools, armnn::InputTensors& inputTensors, armnn::OutputTensors& outputTensors, CallbackContext callback); /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true bool ExecuteWithDummyInputs(); private: template class ArmnnThreadPoolCallback_1_2 : public armnn::IAsyncExecutionCallback { public: ArmnnThreadPoolCallback_1_2(ArmnnPreparedModel_1_2* model, std::shared_ptr>& pMemPools, std::vector outputShapes, std::shared_ptr& inputTensors, std::shared_ptr& outputTensors, CallbackContext callbackContext) : m_Model(model), m_MemPools(pMemPools), m_OutputShapes(outputShapes), m_InputTensors(inputTensors), m_OutputTensors(outputTensors), m_CallbackContext(callbackContext) {} void Notify(armnn::Status status, armnn::InferenceTimingPair timeTaken) override; // Retrieve the Arm NN Status from the AsyncExecutionCallback that has been notified virtual armnn::Status GetStatus() const override { return armnn::Status::Success; } // Block the calling thread until the AsyncExecutionCallback object allows it to proceed virtual void Wait() const override {} // Retrieve the start time before executing the inference virtual armnn::HighResolutionClock GetStartTime() const override { return std::chrono::high_resolution_clock::now(); } // Retrieve the time after executing the inference virtual armnn::HighResolutionClock GetEndTime() const override { return std::chrono::high_resolution_clock::now(); } ArmnnPreparedModel_1_2* m_Model; std::shared_ptr> m_MemPools; std::vector m_OutputShapes; std::shared_ptr m_InputTensors; std::shared_ptr m_OutputTensors; CallbackContext m_CallbackContext; }; Return Execute(const V1_0::Request& request, V1_2::MeasureTiming measureTiming, CallbackAsync_1_2 callback); Return PrepareMemoryForInputs( armnn::InputTensors& inputs, const V1_0::Request& request, const std::vector& memPools); Return PrepareMemoryForOutputs( armnn::OutputTensors& outputs, std::vector &outputShapes, const V1_0::Request& request, const std::vector& memPools); Return PrepareMemoryForIO( armnn::InputTensors& inputs, armnn::OutputTensors& outputs, std::vector& memPools, const V1_0::Request& request, CallbackAsync_1_2 callback); template void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); /// schedule the graph prepared from the request for execution template void ScheduleGraphForExecution( std::shared_ptr>& pMemPools, std::shared_ptr& inputTensors, std::shared_ptr& outputTensors, CallbackContext m_CallbackContext); armnn::NetworkId m_NetworkId; armnn::IRuntime* m_Runtime; V1_2::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here static RequestThread m_RequestThread; uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; std::unique_ptr m_WorkingMemHandle; const bool m_AsyncModelExecutionEnabled; }; }