// // Copyright © 2020 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include "ArmnnDriver.hpp" #include "ArmnnDriverImpl.hpp" #include "RequestThread.hpp" #include "ModelToINetworkConverter.hpp" #include #include #include #include namespace armnn_driver { using CallbackAsync_1_3 = std::function< void(V1_3::ErrorStatus errorStatus, std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, const ::android::hardware::neuralnetworks::V1_2::Timing& timing, std::string callingFunction)>; struct ExecutionContext_1_3 { ::android::hardware::neuralnetworks::V1_2::MeasureTiming measureTimings = ::android::hardware::neuralnetworks::V1_2::MeasureTiming::NO; TimePoint driverStart; }; using CallbackContext_1_3 = CallbackContext; using executeFenced_cb = std::function& callback)>; template class ArmnnPreparedModel_1_3 : public V1_3::IPreparedModel { public: using HalModel = typename V1_3::Model; ArmnnPreparedModel_1_3(armnn::NetworkId networkId, armnn::IRuntime* runtime, const HalModel& model, const std::string& requestInputsAndOutputsDumpDir, const bool gpuProfilingEnabled); virtual ~ArmnnPreparedModel_1_3(); Return execute(const V1_0::Request& request, const sp& callback) override; Return execute_1_2(const V1_0::Request& request, MeasureTiming measure, const sp& callback) override; Return execute_1_3(const V1_3::Request& request, V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint&, const sp& callback) override; Return executeSynchronously(const V1_0::Request &request, MeasureTiming measure, V1_3::IPreparedModel::executeSynchronously_cb cb) override; Return executeSynchronously_1_3(const V1_3::Request &request, MeasureTiming measure, const V1_3::OptionalTimePoint& deadline, V1_3::IPreparedModel::executeSynchronously_1_3_cb cb) override; Return executeFenced(const V1_3::Request& request, const android::hardware::hidl_vec& wait_for, MeasureTiming measure, const V1_3::OptionalTimePoint& deadline, const V1_3::OptionalTimeoutDuration& duration, executeFenced_cb callback) override; Return configureExecutionBurst( const sp& callback, const android::hardware::MQDescriptorSync& requestChannel, const android::hardware::MQDescriptorSync& resultChannel, configureExecutionBurst_cb cb) override; template Return ExecuteSynchronously(const V1_3::Request& request, CallbackContext cbCtx); /// execute the graph prepared from the request template bool ExecuteGraph(std::shared_ptr>& pMemPools, armnn::InputTensors& inputTensors, armnn::OutputTensors& outputTensors, CallbackContext callback); /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true bool ExecuteWithDummyInputs(); private: Return Execute(const V1_3::Request& request, MeasureTiming measureTiming, CallbackAsync_1_3 callback); Return PrepareMemoryForInputs( armnn::InputTensors& inputs, const V1_3::Request& request, const std::vector& memPools); Return PrepareMemoryForOutputs( armnn::OutputTensors& outputs, std::vector &outputShapes, const V1_3::Request& request, const std::vector& memPools); std::tuple, Timing, std::string> PrepareMemoryForIO( armnn::InputTensors& inputs, armnn::OutputTensors& outputs, std::vector& memPools, const V1_3::Request& request); template void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); armnn::NetworkId m_NetworkId; armnn::IRuntime* m_Runtime; V1_3::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here static RequestThread m_RequestThread; uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; }; }