diff options
-rw-r--r-- | ArmnnPreparedModel.cpp | 22 | ||||
-rw-r--r-- | ArmnnPreparedModel.hpp | 22 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_2.cpp | 272 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_2.hpp | 35 | ||||
-rw-r--r-- | RequestThread.cpp | 34 | ||||
-rw-r--r-- | RequestThread.hpp | 12 |
6 files changed, 313 insertions, 84 deletions
diff --git a/ArmnnPreparedModel.cpp b/ArmnnPreparedModel.cpp index 3256836e..462970aa 100644 --- a/ArmnnPreparedModel.cpp +++ b/ArmnnPreparedModel.cpp @@ -87,9 +87,8 @@ using namespace android::hardware; namespace armnn_driver { - template<typename HalVersion> -RequestThread<ArmnnPreparedModel, HalVersion> ArmnnPreparedModel<HalVersion>::m_RequestThread; +RequestThread<ArmnnPreparedModel, HalVersion, ArmnnCallback_1_0> ArmnnPreparedModel<HalVersion>::m_RequestThread; template<typename HalVersion> template <typename TensorBindingCollection> @@ -218,10 +217,17 @@ Return<ErrorStatus> ArmnnPreparedModel<HalVersion>::execute(const Request& reque } ALOGV("ArmnnPreparedModel::execute(...) before PostMsg"); + + auto cb = [callback](ErrorStatus errorStatus, std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, callingFunction); + }; + + ArmnnCallback_1_0 armnnCb; + armnnCb.callback = cb; // post the request for asynchronous execution - m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback); + m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb); ALOGV("ArmnnPreparedModel::execute(...) after PostMsg"); - return ErrorStatus::NONE; // successfully queued } @@ -230,7 +236,7 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph( std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, std::shared_ptr<armnn::InputTensors>& pInputTensors, std::shared_ptr<armnn::OutputTensors>& pOutputTensors, - const ::android::sp<V1_0::IExecutionCallback>& callback) + ArmnnCallback_1_0 cb) { ALOGV("ArmnnPreparedModel::ExecuteGraph(...)"); @@ -243,14 +249,14 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph( if (status != armnn::Status::Success) { ALOGW("EnqueueWorkload failed"); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph"); + cb.callback(ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph"); return; } } catch (armnn::Exception& e) { ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what()); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph"); + cb.callback(ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel::ExecuteGraph"); return; } @@ -264,7 +270,7 @@ void ArmnnPreparedModel<HalVersion>::ExecuteGraph( pool.update(); } - NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph"); + cb.callback(ErrorStatus::NONE, "ExecuteGraph"); } template<typename HalVersion> diff --git a/ArmnnPreparedModel.hpp b/ArmnnPreparedModel.hpp index 275af316..33be972f 100644 --- a/ArmnnPreparedModel.hpp +++ b/ArmnnPreparedModel.hpp @@ -17,6 +17,12 @@ namespace armnn_driver { +using armnnExecuteCallback_1_0 = std::function<void(V1_0::ErrorStatus status, std::string callingFunction)>; + +struct ArmnnCallback_1_0 +{ + armnnExecuteCallback_1_0 callback; +}; template <typename HalVersion> class ArmnnPreparedModel : public V1_0::IPreparedModel @@ -39,7 +45,7 @@ public: void ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, std::shared_ptr<armnn::InputTensors>& pInputTensors, std::shared_ptr<armnn::OutputTensors>& pOutputTensors, - const ::android::sp<V1_0::IExecutionCallback>& callback); + ArmnnCallback_1_0 callback); /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true @@ -49,15 +55,15 @@ private: template <typename TensorBindingCollection> void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - HalModel m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + HalModel m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread<ArmnnPreparedModel, HalVersion> m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; - const bool m_GpuProfilingEnabled; + static RequestThread<ArmnnPreparedModel, HalVersion, ArmnnCallback_1_0> m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; }; } diff --git a/ArmnnPreparedModel_1_2.cpp b/ArmnnPreparedModel_1_2.cpp index f6b44621..a7997c72 100644 --- a/ArmnnPreparedModel_1_2.cpp +++ b/ArmnnPreparedModel_1_2.cpp @@ -37,7 +37,10 @@ unsigned long MicrosecondsDuration(TimePoint endPoint, TimePoint startPoint) endPoint - startPoint).count()); } -void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, ErrorStatus errorStatus, +void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callback, + ErrorStatus errorStatus, + std::vector<OutputShape>, + const Timing, std::string callingFunction) { Return<void> returned = callback->notify(errorStatus); @@ -49,10 +52,13 @@ void NotifyCallbackAndCheck(const ::android::sp<V1_0::IExecutionCallback>& callb } } -void NotifyCallbackAndCheck(const ::android::sp<V1_2::IExecutionCallback>& callback, ErrorStatus errorStatus, +void NotifyCallbackAndCheck(const ::android::sp<V1_2::IExecutionCallback>& callback, + ErrorStatus errorStatus, + std::vector<OutputShape> outputShapes, + const Timing timing, std::string callingFunction) { - Return<void> returned = callback->notify(errorStatus); + Return<void> returned = callback->notify_1_2(errorStatus, outputShapes, timing); // This check is required, if the callback fails and it isn't checked it will bring down the service if (!returned.isOk()) { @@ -111,7 +117,8 @@ namespace armnn_driver { template<typename HalVersion> -RequestThread<ArmnnPreparedModel_1_2, HalVersion> ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread; +RequestThread<ArmnnPreparedModel_1_2, HalVersion, ArmnnCallback_1_2> + ArmnnPreparedModel_1_2<HalVersion>::m_RequestThread; template<typename HalVersion> template<typename TensorBindingCollection> @@ -165,15 +172,43 @@ template<typename HalVersion> Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::execute(const Request& request, const ::android::sp<V1_0::IExecutionCallback>& callback) { - return Execute<V1_0::IExecutionCallback>(request, callback); + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed"); + return ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](ErrorStatus errorStatus, + std::vector<OutputShape> outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + return Execute(request, MeasureTiming::NO, cb); } template<typename HalVersion> Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::execute_1_2(const Request& request, - MeasureTiming, + MeasureTiming measureTiming, const sp<V1_2::IExecutionCallback>& callback) { - return Execute<V1_2::IExecutionCallback>(request, callback); + if (callback.get() == nullptr) + { + ALOGE("ArmnnPreparedModel_1_2::execute_1_2 invalid callback passed"); + return ErrorStatus::INVALID_ARGUMENT; + } + + auto cb = [callback](ErrorStatus errorStatus, + std::vector<OutputShape> outputShapes, + const Timing& timing, + std::string callingFunction) + { + NotifyCallbackAndCheck(callback, errorStatus, outputShapes, timing, callingFunction); + }; + + return Execute(request, measureTiming, cb); } template<typename HalVersion> @@ -217,8 +252,8 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming); return Void(); } + std::vector<OutputShape> outputShapes(request.outputs.size()); - // add the inputs and outputs with their data try { pInputTensors->reserve(request.inputs.size()); @@ -238,8 +273,8 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ pInputTensors->emplace_back(i, inputTensor); } - pOutputTensors->reserve(request.outputs.size()); + for (unsigned int i = 0; i < request.outputs.size(); i++) { const auto& outputArg = request.outputs[i]; @@ -253,6 +288,28 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ cb(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming); return Void(); } + const size_t outputSize = outputTensorInfo.GetNumBytes(); + const size_t bufferSize = pMemPools->at(outputArg.location.poolIndex).getHidlMemory().size(); + + hidl_vec<uint32_t> dimensions; + + armnn::TensorShape tensorShape = outputTensorInfo.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + outputShapes[i].dimensions = dimensions; + outputShapes[i].isSufficient = bufferSize >= outputSize; + + if (bufferSize < outputSize) + { + ALOGW("ArmnnPreparedModel_1_2::Execute failed"); + cb(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, outputShapes, g_NoTiming); + return Void(); + } pOutputTensors->emplace_back(i, outputTensor); } @@ -314,15 +371,82 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::executeSynchronously(const Requ timing.timeInDriver = MicrosecondsDuration(driverEnd, driverStart); ALOGV("ArmnnPreparedModel_1_2::executeSynchronously timing Device = %lu Driver = %lu", timing.timeOnDevice, timing.timeInDriver); - cb(ErrorStatus::NONE, {}, timing); + cb(ErrorStatus::NONE, outputShapes, timing); } else { - cb(ErrorStatus::NONE, {}, g_NoTiming); + cb(ErrorStatus::NONE, outputShapes, g_NoTiming); } return Void(); } +class ArmnnBurstExecutorWithCache : public ExecutionBurstServer::IBurstExecutorWithCache { +public: + ArmnnBurstExecutorWithCache(IPreparedModel* preparedModel) + : m_PreparedModel(preparedModel) + {} + + bool isCacheEntryPresent(int32_t slot) const override + { + const auto it = m_MemoryCache.find(slot); + return (it != m_MemoryCache.end()) && it->second.valid(); + } + + void addCacheEntry(const hidl_memory& memory, int32_t slot) override + { + m_MemoryCache[slot] = memory; + } + + void removeCacheEntry(int32_t slot) override + { + m_MemoryCache.erase(slot); + } + + std::tuple<ErrorStatus, hidl_vec<OutputShape>, Timing> execute( + const Request& request, const std::vector<int32_t>& slots, + MeasureTiming measure) override + { + ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache::execute"); + hidl_vec<hidl_memory> pools(slots.size()); + + std::transform(slots.begin(), slots.end(), pools.begin(), [this](int32_t slot) + { + return m_MemoryCache[slot]; + }); + + Request fullRequest = request; + fullRequest.pools = std::move(pools); + + // Setup Callback + ErrorStatus returnedStatus = ErrorStatus::GENERAL_FAILURE; + hidl_vec<OutputShape> returnedOutputShapes; + Timing returnedTiming; + auto cb = [&returnedStatus, &returnedOutputShapes, &returnedTiming](ErrorStatus status, + const hidl_vec<OutputShape>& outputShapes, + const Timing& timing) + { + returnedStatus = status; + returnedOutputShapes = outputShapes; + returnedTiming = timing; + }; + + // Execute + ALOGV("ArmnnPreparedModel_1_2::BurstExecutorWithCache executing"); + const Return<void> ret = m_PreparedModel->executeSynchronously(fullRequest, measure, cb); + + if (!ret.isOk() || returnedStatus != ErrorStatus::NONE) + { + ALOGE("ArmnnPreparedModel_1_2::BurstExecutorWithCache::error executing"); + } + return std::make_tuple(returnedStatus, std::move(returnedOutputShapes), returnedTiming); + } + +private: + IPreparedModel* const m_PreparedModel; + std::map<int, hidl_memory> m_MemoryCache; +}; + + template<typename HalVersion> Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst( const sp<V1_2::IBurstCallback>& callback, @@ -331,7 +455,12 @@ Return<void> ArmnnPreparedModel_1_2<HalVersion>::configureExecutionBurst( V1_2::IPreparedModel::configureExecutionBurst_cb cb) { ALOGV("ArmnnPreparedModel_1_2::configureExecutionBurst"); - const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(callback, requestChannel, resultChannel, this); + const std::shared_ptr<ArmnnBurstExecutorWithCache> executorWithCache = + std::make_shared<ArmnnBurstExecutorWithCache>(this); + const sp<V1_2::IBurstContext> burst = ExecutionBurstServer::create(callback, + requestChannel, + resultChannel, + executorWithCache); if (burst == nullptr) { @@ -349,27 +478,64 @@ void ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph( std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, std::shared_ptr<armnn::InputTensors>& pInputTensors, std::shared_ptr<armnn::OutputTensors>& pOutputTensors, - const ::android::sp<V1_0::IExecutionCallback>& callback) + ArmnnCallback_1_2 cb) { ALOGV("ArmnnPreparedModel_1_2::ExecuteGraph(...)"); + TimePoint driverEnd, deviceStart, deviceEnd; + DumpTensorsIfRequired("Input", *pInputTensors); + std::vector<std::pair<int, armnn::Tensor> > outputTensors = *pOutputTensors.get(); + std::vector<OutputShape> outputShapes(outputTensors.size()); + + for (unsigned int i = 0; i < outputTensors.size(); i++) + { + std::pair<int, armnn::Tensor> outputTensorPair = outputTensors[i]; + const armnn::Tensor outputTensor = outputTensorPair.second; + const armnn::TensorInfo outputTensorInfo = outputTensor.GetInfo(); + + hidl_vec<uint32_t> dimensions; + + armnn::TensorShape tensorShape = outputTensorInfo.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + outputShapes[i].dimensions = dimensions; + outputShapes[i].isSufficient = true; + } + // run it try { + if (cb.measureTiming == MeasureTiming::YES) + { + deviceStart = Now(); + } + armnn::Status status = m_Runtime->EnqueueWorkload(m_NetworkId, *pInputTensors, *pOutputTensors); + + if (cb.measureTiming == MeasureTiming::YES) + { + deviceEnd = Now(); + } if (status != armnn::Status::Success) { ALOGW("EnqueueWorkload failed"); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph"); + cb.callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, + "ArmnnPreparedModel_1_2::ExecuteGraph"); return; } } catch (armnn::Exception& e) { ALOGW("armnn::Exception caught from EnqueueWorkload: %s", e.what()); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::ExecuteGraph"); + cb.callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, + "ArmnnPreparedModel_1_2::ExecuteGraph"); return; } @@ -383,7 +549,16 @@ void ArmnnPreparedModel_1_2<HalVersion>::ExecuteGraph( pool.update(); } - NotifyCallbackAndCheck(callback, ErrorStatus::NONE, "ExecuteGraph"); + if (cb.measureTiming == MeasureTiming::YES) + { + driverEnd = Now(); + Timing timing; + timing.timeOnDevice = MicrosecondsDuration(deviceEnd, deviceStart); + timing.timeInDriver = MicrosecondsDuration(driverEnd, cb.driverStart); + cb.callback(ErrorStatus::NONE, outputShapes, timing, "ExecuteGraph"); + } else { + cb.callback(ErrorStatus::NONE, outputShapes, g_NoTiming, "ExecuteGraph"); + } } template<typename HalVersion> @@ -428,28 +603,29 @@ bool ArmnnPreparedModel_1_2<HalVersion>::ExecuteWithDummyInputs() } template<typename HalVersion> -template<typename ExecutionCallback> Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& request, - const sp<ExecutionCallback>& callback) + MeasureTiming measureTiming, + armnnExecuteCallback_1_2 callback) { - ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str()); - m_RequestCount++; + TimePoint driverStart; - if (callback.get() == nullptr) + if (measureTiming == MeasureTiming::YES) { - ALOGE("ArmnnPreparedModel_1_2::execute invalid callback passed"); - return ErrorStatus::INVALID_ARGUMENT; + driverStart = Now(); } + ALOGV("ArmnnPreparedModel_1_2::execute(): %s", GetModelSummary(m_Model).c_str()); + m_RequestCount++; + if (!android::nn::validateRequest(request, m_Model)) { - NotifyCallbackAndCheck(callback, ErrorStatus::INVALID_ARGUMENT, "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::INVALID_ARGUMENT, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::INVALID_ARGUMENT; } if (!m_RequestInputsAndOutputsDumpDir.empty()) { - ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(callback.get())); + ALOGD("Dumping inputs and outputs for request %" PRIuPTR, reinterpret_cast<std::uintptr_t>(&callback)); } // allocate the tensors on the heap, as they are passed to the request thread @@ -462,7 +638,7 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& if (!setRunTimePoolInfosFromHidlMemories(pMemPools.get(), request.pools)) { - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } @@ -480,8 +656,7 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& if (inputTensor.GetMemoryArea() == nullptr) { ALOGE("Cannot execute request. Error converting request input %u to tensor", i); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, - "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } @@ -489,6 +664,8 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& } pOutputTensors->reserve(request.outputs.size()); + std::vector<OutputShape> outputShapes(request.outputs.size()); + for (unsigned int i = 0; i < request.outputs.size(); i++) { const auto& outputArg = request.outputs[i]; @@ -496,33 +673,58 @@ Return <ErrorStatus> ArmnnPreparedModel_1_2<HalVersion>::Execute(const Request& const armnn::TensorInfo outputTensorInfo = m_Runtime->GetOutputTensorInfo(m_NetworkId, i); const armnn::Tensor outputTensor = GetTensorForRequestArgument(outputArg, outputTensorInfo, *pMemPools); if (outputTensor.GetMemoryArea() == nullptr) - { ALOGE("Cannot execute request. Error converting request output %u to tensor", i); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, - "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } + const size_t outputSize = outputTensorInfo.GetNumBytes(); + const size_t bufferSize = pMemPools->at(outputArg.location.poolIndex).getHidlMemory().size(); pOutputTensors->emplace_back(i, outputTensor); + + hidl_vec<uint32_t> dimensions; + + armnn::TensorShape tensorShape = outputTensorInfo.GetShape(); + const unsigned int numDims = tensorShape.GetNumDimensions(); + dimensions.resize(numDims); + + for (unsigned int outputIdx = 0u; outputIdx < numDims; ++outputIdx) + { + dimensions[outputIdx] = tensorShape[outputIdx]; + } + outputShapes[i].dimensions = dimensions; + outputShapes[i].isSufficient = bufferSize >= outputSize; + + if (bufferSize < outputSize) + { + ALOGW("ArmnnPreparedModel_1_2::Execute failed"); + callback(ErrorStatus::OUTPUT_INSUFFICIENT_SIZE, + outputShapes, + g_NoTiming, + "ArmnnPreparedModel_1_2::Execute"); + return ErrorStatus::NONE; + } } } catch (armnn::Exception& e) { ALOGW("armnn::Exception caught while preparing for EnqueueWorkload: %s", e.what()); - NotifyCallbackAndCheck(callback, ErrorStatus::GENERAL_FAILURE, "ArmnnPreparedModel_1_2::execute"); + callback(ErrorStatus::GENERAL_FAILURE, {}, g_NoTiming, "ArmnnPreparedModel_1_2::execute"); return ErrorStatus::GENERAL_FAILURE; } ALOGV("ArmnnPreparedModel_1_2::execute(...) before PostMsg"); // post the request for asynchronous execution - m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, callback); + ArmnnCallback_1_2 armnnCb; + armnnCb.callback = callback; + armnnCb.measureTiming = measureTiming; + armnnCb.driverStart = driverStart; + m_RequestThread.PostMsg(this, pMemPools, pInputTensors, pOutputTensors, armnnCb); ALOGV("ArmnnPreparedModel_1_2::execute(...) after PostMsg"); - return ErrorStatus::NONE; } - #ifdef ARMNN_ANDROID_NN_V1_2 template class ArmnnPreparedModel_1_2<hal_1_2::HalPolicy>; #endif diff --git a/ArmnnPreparedModel_1_2.hpp b/ArmnnPreparedModel_1_2.hpp index 4e883b6b..b97895e8 100644 --- a/ArmnnPreparedModel_1_2.hpp +++ b/ArmnnPreparedModel_1_2.hpp @@ -19,6 +19,18 @@ namespace armnn_driver { +typedef std::function<void(::android::hardware::neuralnetworks::V1_0::ErrorStatus status, + std::vector<::android::hardware::neuralnetworks::V1_2::OutputShape> outputShapes, + const ::android::hardware::neuralnetworks::V1_2::Timing& timing, + std::string callingFunction)> armnnExecuteCallback_1_2; + +struct ArmnnCallback_1_2 +{ + armnnExecuteCallback_1_2 callback; + TimePoint driverStart; + MeasureTiming measureTiming; +}; + template <typename HalVersion> class ArmnnPreparedModel_1_2 : public V1_2::IPreparedModel { @@ -34,7 +46,7 @@ public: virtual ~ArmnnPreparedModel_1_2(); virtual Return<ErrorStatus> execute(const Request& request, - const ::android::sp<V1_0::IExecutionCallback>& callback) override; + const sp<V1_0::IExecutionCallback>& callback) override; virtual Return<ErrorStatus> execute_1_2(const Request& request, MeasureTiming measure, const sp<V1_2::IExecutionCallback>& callback) override; @@ -53,28 +65,29 @@ public: void ExecuteGraph(std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& pMemPools, std::shared_ptr<armnn::InputTensors>& pInputTensors, std::shared_ptr<armnn::OutputTensors>& pOutputTensors, - const ::android::sp<V1_0::IExecutionCallback>& callback); + ArmnnCallback_1_2 callbackDescriptor); /// Executes this model with dummy inputs (e.g. all zeroes). /// \return false on failure, otherwise true bool ExecuteWithDummyInputs(); private: - template <typename ExecutionCallback> - Return <ErrorStatus> Execute(const Request &request, const sp <ExecutionCallback> &callback); + Return <ErrorStatus> Execute(const Request& request, + MeasureTiming measureTiming, + armnnExecuteCallback_1_2 callback); template <typename TensorBindingCollection> void DumpTensorsIfRequired(char const* tensorNamePrefix, const TensorBindingCollection& tensorBindings); - armnn::NetworkId m_NetworkId; - armnn::IRuntime* m_Runtime; - V1_2::Model m_Model; + armnn::NetworkId m_NetworkId; + armnn::IRuntime* m_Runtime; + V1_2::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread<ArmnnPreparedModel_1_2, HalVersion> m_RequestThread; - uint32_t m_RequestCount; - const std::string& m_RequestInputsAndOutputsDumpDir; - const bool m_GpuProfilingEnabled; + static RequestThread<ArmnnPreparedModel_1_2, HalVersion, ArmnnCallback_1_2> m_RequestThread; + uint32_t m_RequestCount; + const std::string& m_RequestInputsAndOutputsDumpDir; + const bool m_GpuProfilingEnabled; }; } diff --git a/RequestThread.cpp b/RequestThread.cpp index 4b646034..052c5c11 100644 --- a/RequestThread.cpp +++ b/RequestThread.cpp @@ -21,15 +21,15 @@ using namespace android; namespace armnn_driver { -template <template <typename HalVersion> class PreparedModel, typename HalVersion> -RequestThread<PreparedModel, HalVersion>::RequestThread() +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback> +RequestThread<PreparedModel, HalVersion, Callback>::RequestThread() { ALOGV("RequestThread::RequestThread()"); m_Thread = std::make_unique<std::thread>(&RequestThread::Process, this); } -template <template <typename HalVersion> class PreparedModel, typename HalVersion> -RequestThread<PreparedModel, HalVersion>::~RequestThread() +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback> +RequestThread<PreparedModel, HalVersion, Callback>::~RequestThread() { ALOGV("RequestThread::~RequestThread()"); @@ -54,12 +54,12 @@ RequestThread<PreparedModel, HalVersion>::~RequestThread() catch (const std::exception&) { } // Swallow any exception. } -template <template <typename HalVersion> class PreparedModel, typename HalVersion> -void RequestThread<PreparedModel, HalVersion>::PostMsg(PreparedModel<HalVersion>* model, +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback> +void RequestThread<PreparedModel, HalVersion, Callback>::PostMsg(PreparedModel<HalVersion>* model, std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools, std::shared_ptr<armnn::InputTensors>& inputTensors, std::shared_ptr<armnn::OutputTensors>& outputTensors, - const ::android::sp<V1_0::IExecutionCallback>& callback) + Callback callback) { ALOGV("RequestThread::PostMsg(...)"); auto data = std::make_shared<AsyncExecuteData>(model, @@ -71,8 +71,8 @@ void RequestThread<PreparedModel, HalVersion>::PostMsg(PreparedModel<HalVersion> PostMsg(pMsg); } -template <template <typename HalVersion> class PreparedModel, typename HalVersion> -void RequestThread<PreparedModel, HalVersion>::PostMsg(std::shared_ptr<ThreadMsg>& pMsg) +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback> +void RequestThread<PreparedModel, HalVersion, Callback>::PostMsg(std::shared_ptr<ThreadMsg>& pMsg) { ALOGV("RequestThread::PostMsg(pMsg)"); // Add a message to the queue and notify the request thread @@ -81,8 +81,8 @@ void RequestThread<PreparedModel, HalVersion>::PostMsg(std::shared_ptr<ThreadMsg m_Cv.notify_one(); } -template <template <typename HalVersion> class PreparedModel, typename HalVersion> -void RequestThread<PreparedModel, HalVersion>::Process() +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback> +void RequestThread<PreparedModel, HalVersion, Callback>::Process() { ALOGV("RequestThread::Process()"); while (true) @@ -111,7 +111,7 @@ void RequestThread<PreparedModel, HalVersion>::Process() model->ExecuteGraph(pMsg->data->m_MemPools, pMsg->data->m_InputTensors, pMsg->data->m_OutputTensors, - pMsg->data->m_callback); + pMsg->data->m_Callback); break; } @@ -139,16 +139,16 @@ void RequestThread<PreparedModel, HalVersion>::Process() /// Class template specializations /// -template class RequestThread<ArmnnPreparedModel, hal_1_0::HalPolicy>; +template class RequestThread<ArmnnPreparedModel, hal_1_0::HalPolicy, ArmnnCallback_1_0>; #ifdef ARMNN_ANDROID_NN_V1_1 -template class RequestThread<armnn_driver::ArmnnPreparedModel, hal_1_1::HalPolicy>; +template class RequestThread<armnn_driver::ArmnnPreparedModel, hal_1_1::HalPolicy, ArmnnCallback_1_0>; #endif #ifdef ARMNN_ANDROID_NN_V1_2 -template class RequestThread<ArmnnPreparedModel, hal_1_1::HalPolicy>; -template class RequestThread<ArmnnPreparedModel, hal_1_2::HalPolicy>; -template class RequestThread<ArmnnPreparedModel_1_2, hal_1_2::HalPolicy>; +template class RequestThread<ArmnnPreparedModel, hal_1_1::HalPolicy, ArmnnCallback_1_0>; +template class RequestThread<ArmnnPreparedModel, hal_1_2::HalPolicy, ArmnnCallback_1_0>; +template class RequestThread<ArmnnPreparedModel_1_2, hal_1_2::HalPolicy, ArmnnCallback_1_2>; #endif } // namespace armnn_driver diff --git a/RequestThread.hpp b/RequestThread.hpp index dc1b535a..253d104c 100644 --- a/RequestThread.hpp +++ b/RequestThread.hpp @@ -18,8 +18,10 @@ namespace armnn_driver { +using TimePoint = std::chrono::steady_clock::time_point; +static const TimePoint g_Min = std::chrono::steady_clock::time_point::min(); -template<template <typename HalVersion> class PreparedModel, typename HalVersion> +template<template <typename HalVersion> class PreparedModel, typename HalVersion, typename Callback> class RequestThread { public: @@ -39,7 +41,7 @@ public: std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools, std::shared_ptr<armnn::InputTensors>& inputTensors, std::shared_ptr<armnn::OutputTensors>& outputTensors, - const ::android::sp<V1_0::IExecutionCallback>& callback); + Callback callback); private: RequestThread(const RequestThread&) = delete; @@ -52,12 +54,12 @@ private: std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools, std::shared_ptr<armnn::InputTensors>& inputTensors, std::shared_ptr<armnn::OutputTensors>& outputTensors, - const ::android::sp<V1_0::IExecutionCallback>& cb) + Callback callback) : m_Model(model) , m_MemPools(memPools) , m_InputTensors(inputTensors) , m_OutputTensors(outputTensors) - , m_callback(cb) + , m_Callback(callback) { } @@ -65,7 +67,7 @@ private: std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools; std::shared_ptr<armnn::InputTensors> m_InputTensors; std::shared_ptr<armnn::OutputTensors> m_OutputTensors; - const ::android::sp<V1_0::IExecutionCallback> m_callback; + Callback m_Callback; }; enum class ThreadMsgType |