diff options
-rw-r--r-- | 1.3/ArmnnDriver.hpp | 3 | ||||
-rw-r--r-- | 1.3/ArmnnDriverImpl.cpp | 6 | ||||
-rw-r--r-- | 1.3/ArmnnDriverImpl.hpp | 3 | ||||
-rw-r--r-- | Android.mk | 7 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_3.cpp | 12 | ||||
-rw-r--r-- | ArmnnPreparedModel_1_3.hpp | 10 | ||||
-rw-r--r-- | RequestThread.cpp | 1 | ||||
-rw-r--r-- | RequestThread_1_3.cpp | 193 | ||||
-rw-r--r-- | RequestThread_1_3.hpp | 106 |
9 files changed, 331 insertions, 10 deletions
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp index 798c4381..b6b55fae 100644 --- a/1.3/ArmnnDriver.hpp +++ b/1.3/ArmnnDriver.hpp @@ -206,7 +206,8 @@ public: model, cb, model.relaxComputationFloat32toFloat16 - && m_Options.GetFp16Enabled()); + && m_Options.GetFp16Enabled(), + priority); } Return<void> getSupportedExtensions(getSupportedExtensions_cb cb) diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp index 4b2ff148..6168c9d0 100644 --- a/1.3/ArmnnDriverImpl.cpp +++ b/1.3/ArmnnDriverImpl.cpp @@ -101,7 +101,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( const DriverOptions& options, const V1_3::Model& model, const sp<V1_3::IPreparedModelCallback>& cb, - bool float32ToFloat16) + bool float32ToFloat16, + V1_3::Priority priority) { ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()"); @@ -204,7 +205,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3( runtime.get(), model, options.GetRequestInputsAndOutputsDumpDir(), - options.IsGpuProfilingEnabled())); + options.IsGpuProfilingEnabled(), + priority)); // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp index 8a665ea5..2b39d4e0 100644 --- a/1.3/ArmnnDriverImpl.hpp +++ b/1.3/ArmnnDriverImpl.hpp @@ -30,7 +30,8 @@ public: const DriverOptions& options, const V1_3::Model& model, const android::sp<V1_3::IPreparedModelCallback>& cb, - bool float32ToFloat16 = false); + bool float32ToFloat16 = false, + V1_3::Priority priority = V1_3::Priority::MEDIUM); static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime, V1_3::IDevice::getCapabilities_1_3_cb cb); @@ -72,6 +72,9 @@ ifeq ($(ARMNN_LIBOPENCL),0) ARMNN_INCLUDE_LIBOPENCL := 0 endif +# Variable to control retire rate of priority queue +RETIRE_RATE := 3 + ####################### # libarmnn-driver@1.0 # ####################### @@ -486,6 +489,9 @@ LOCAL_CFLAGS += \ -DARMNNREF_ENABLED endif # ARMNN_REF_ENABLED == 1 +LOCAL_CFLAGS += \ + -DRETIRE_RATE=$(RETIRE_RATE) + LOCAL_SRC_FILES := \ 1.0/ArmnnDriverImpl.cpp \ 1.0/HalPolicy.cpp \ @@ -504,6 +510,7 @@ LOCAL_SRC_FILES := \ DriverOptions.cpp \ ModelToINetworkConverter.cpp \ RequestThread.cpp \ + RequestThread_1_3.cpp \ Utils.cpp LOCAL_STATIC_LIBRARIES := \ diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index 1fb03f44..c7adc6c4 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -138,7 +138,7 @@ namespace armnn_driver { template<typename HalVersion> -RequestThread<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> +RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread; template<typename HalVersion> @@ -164,13 +164,15 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw armnn::IRuntime* runtime, const V1_3::Model& model, const std::string& requestInputsAndOutputsDumpDir, - const bool gpuProfilingEnabled) + const bool gpuProfilingEnabled, + V1_3::Priority priority) : m_NetworkId(networkId) , m_Runtime(runtime) , m_Model(model) , m_RequestCount(0) , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) , m_GpuProfilingEnabled(gpuProfilingEnabled) + , m_ModelPriority(priority) { // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); @@ -830,6 +832,12 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_ return V1_3::ErrorStatus::NONE; } +template<typename HalVersion> +V1_3::Priority ArmnnPreparedModel_1_3<HalVersion>::GetModelPriority() +{ + return m_ModelPriority; +} + #ifdef ARMNN_ANDROID_NN_V1_3 template class ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>; template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ExecuteGraph<CallbackContext_1_3>( diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp index 4dd15c17..5010bbdb 100644 --- a/ArmnnPreparedModel_1_3.hpp +++ b/ArmnnPreparedModel_1_3.hpp @@ -7,7 +7,7 @@ #include "ArmnnDriver.hpp" #include "ArmnnDriverImpl.hpp" -#include "RequestThread.hpp" +#include "RequestThread_1_3.hpp" #include "ModelToINetworkConverter.hpp" #include <NeuralNetworks.h> @@ -50,7 +50,8 @@ public: armnn::IRuntime* runtime, const HalModel& model, const std::string& requestInputsAndOutputsDumpDir, - const bool gpuProfilingEnabled); + const bool gpuProfilingEnabled, + V1_3::Priority priority = V1_3::Priority::MEDIUM); virtual ~ArmnnPreparedModel_1_3(); @@ -105,6 +106,8 @@ public: /// \return false on failure, otherwise true bool ExecuteWithDummyInputs(); + V1_3::Priority GetModelPriority(); + private: Return <V1_3::ErrorStatus> Execute(const V1_3::Request& request, MeasureTiming measureTiming, @@ -135,10 +138,11 @@ private: V1_3::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread; + static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread; uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; + V1_3::Priority m_ModelPriority; }; } diff --git a/RequestThread.cpp b/RequestThread.cpp index a177b1a4..927af922 100644 --- a/RequestThread.cpp +++ b/RequestThread.cpp @@ -161,7 +161,6 @@ template class RequestThread<ArmnnPreparedModel, hal_1_1::HalPolicy, CallbackCon template class RequestThread<ArmnnPreparedModel, hal_1_2::HalPolicy, CallbackContext_1_0>; template class RequestThread<ArmnnPreparedModel, hal_1_3::HalPolicy, CallbackContext_1_0>; template class RequestThread<ArmnnPreparedModel_1_2, hal_1_2::HalPolicy, CallbackContext_1_2>; -template class RequestThread<ArmnnPreparedModel_1_3, hal_1_3::HalPolicy, CallbackContext_1_3>; #endif } // namespace armnn_driver diff --git a/RequestThread_1_3.cpp b/RequestThread_1_3.cpp new file mode 100644 index 00000000..59fa70ed --- /dev/null +++ b/RequestThread_1_3.cpp @@ -0,0 +1,193 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#define LOG_TAG "ArmnnDriver" + +#include "RequestThread_1_3.hpp" + +#include "ArmnnPreparedModel_1_3.hpp" + +#include <armnn/utility/Assert.hpp> + +#include <log/log.h> + +using namespace android; + +namespace armnn_driver +{ + +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext> +RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::RequestThread_1_3() +{ + ALOGV("RequestThread_1_3::RequestThread_1_3()"); + m_Thread = std::make_unique<std::thread>(&RequestThread_1_3::Process, this); +} + +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext> +RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::~RequestThread_1_3() +{ + ALOGV("RequestThread_1_3::~RequestThread_1_3()"); + + try + { + // Coverity fix: The following code may throw an exception of type std::length_error. + + // This code is meant to to terminate the inner thread gracefully by posting an EXIT message + // to the thread's message queue. However, according to Coverity, this code could throw an exception and fail. + // Since only one static instance of RequestThread is used in the driver (in ArmnnPreparedModel), + // this destructor is called only when the application has been closed, which means that + // the inner thread will be terminated anyway, although abruptly, in the event that the destructor code throws. + // Wrapping the destructor's code with a try-catch block simply fixes the Coverity bug. + + // Post an EXIT message to the thread + std::shared_ptr<AsyncExecuteData> nulldata(nullptr); + auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::EXIT, nulldata); + PostMsg(pMsg); + // Wait for the thread to terminate, it is deleted automatically + m_Thread->join(); + } + catch (const std::exception&) { } // Swallow any exception. +} + +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext> +void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::PostMsg(PreparedModel<HalVersion>* model, + std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools, + std::shared_ptr<armnn::InputTensors>& inputTensors, + std::shared_ptr<armnn::OutputTensors>& outputTensors, + CallbackContext callbackContext) +{ + ALOGV("RequestThread_1_3::PostMsg(...)"); + auto data = std::make_shared<AsyncExecuteData>(model, + memPools, + inputTensors, + outputTensors, + callbackContext); + auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::REQUEST, data); + PostMsg(pMsg, model->GetModelPriority()); +} + +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext> +void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::PostMsg(std::shared_ptr<ThreadMsg>& pMsg, + V1_3::Priority priority) +{ + ALOGV("RequestThread_1_3::PostMsg(pMsg)"); + // Add a message to the queue and notify the request thread + std::unique_lock<std::mutex> lock(m_Mutex); + switch (priority) { + case V1_3::Priority::HIGH: + m_HighPriorityQueue.push(pMsg); + break; + case V1_3::Priority::LOW: + m_LowPriorityQueue.push(pMsg); + break; + case V1_3::Priority::MEDIUM: + default: + m_MediumPriorityQueue.push(pMsg); + } + m_Cv.notify_one(); +} + +template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext> +void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::Process() +{ + ALOGV("RequestThread_1_3::Process()"); + int retireRate = RETIRE_RATE; + int highPriorityCount = 0; + int mediumPriorityCount = 0; + while (true) + { + std::shared_ptr<ThreadMsg> pMsg(nullptr); + { + // Wait for a message to be added to the queue + // This is in a separate scope to minimise the lifetime of the lock + std::unique_lock<std::mutex> lock(m_Mutex); + while (m_HighPriorityQueue.empty() && m_MediumPriorityQueue.empty() && m_LowPriorityQueue.empty()) + { + m_Cv.wait(lock); + } + // Get the message to process from the front of each queue based on priority from high to low + // Get high priority first if it does not exceed the retire rate + if (!m_HighPriorityQueue.empty() && highPriorityCount < retireRate) + { + pMsg = m_HighPriorityQueue.front(); + m_HighPriorityQueue.pop(); + highPriorityCount += 1; + } + // If high priority queue is empty or the count exceeds the retire rate, get medium priority message + else if (!m_MediumPriorityQueue.empty() && mediumPriorityCount < retireRate) + { + pMsg = m_MediumPriorityQueue.front(); + m_MediumPriorityQueue.pop(); + mediumPriorityCount += 1; + // Reset high priority count + highPriorityCount = 0; + } + // If medium priority queue is empty or the count exceeds the retire rate, get low priority message + else if (!m_LowPriorityQueue.empty()) + { + pMsg = m_LowPriorityQueue.front(); + m_LowPriorityQueue.pop(); + // Reset high and medium priority count + highPriorityCount = 0; + mediumPriorityCount = 0; + } + else + { + // Reset high and medium priority count + highPriorityCount = 0; + mediumPriorityCount = 0; + continue; + } + } + + switch (pMsg->type) + { + case ThreadMsgType::REQUEST: + { + ALOGV("RequestThread_1_3::Process() - request"); + // invoke the asynchronous execution method + PreparedModel<HalVersion>* model = pMsg->data->m_Model; + model->ExecuteGraph(pMsg->data->m_MemPools, + *(pMsg->data->m_InputTensors), + *(pMsg->data->m_OutputTensors), + pMsg->data->m_CallbackContext); + break; + } + + case ThreadMsgType::EXIT: + { + ALOGV("RequestThread_1_3::Process() - exit"); + // delete all remaining messages (there should not be any) + std::unique_lock<std::mutex> lock(m_Mutex); + while (!m_HighPriorityQueue.empty()) + { + m_HighPriorityQueue.pop(); + } + while (!m_MediumPriorityQueue.empty()) + { + m_MediumPriorityQueue.pop(); + } + while (!m_LowPriorityQueue.empty()) + { + m_LowPriorityQueue.pop(); + } + return; + } + + default: + // this should be unreachable + ALOGE("RequestThread_1_3::Process() - invalid message type"); + ARMNN_ASSERT_MSG(false, "ArmNN: RequestThread_1_3: invalid message type"); + } + } +} + +/// +/// Class template specializations +/// + +template class RequestThread_1_3<ArmnnPreparedModel_1_3, hal_1_3::HalPolicy, CallbackContext_1_3>; + +} // namespace armnn_driver diff --git a/RequestThread_1_3.hpp b/RequestThread_1_3.hpp new file mode 100644 index 00000000..c8abc5e5 --- /dev/null +++ b/RequestThread_1_3.hpp @@ -0,0 +1,106 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include <queue> +#include <thread> +#include <mutex> +#include <condition_variable> + +#include "ArmnnDriver.hpp" +#include "ArmnnDriverImpl.hpp" + +#include <CpuExecutor.h> +#include <armnn/ArmNN.hpp> + +namespace armnn_driver +{ +using TimePoint = std::chrono::steady_clock::time_point; + +template<template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext> +class RequestThread_1_3 +{ +public: + /// Constructor creates the thread + RequestThread_1_3(); + + /// Destructor terminates the thread + ~RequestThread_1_3(); + + /// Add a message to the thread queue. + /// @param[in] model pointer to the prepared model handling the request + /// @param[in] memPools pointer to the memory pools vector for the tensors + /// @param[in] inputTensors pointer to the input tensors for the request + /// @param[in] outputTensors pointer to the output tensors for the request + /// @param[in] callback the android notification callback + void PostMsg(PreparedModel<HalVersion>* model, + std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools, + std::shared_ptr<armnn::InputTensors>& inputTensors, + std::shared_ptr<armnn::OutputTensors>& outputTensors, + CallbackContext callbackContext); + +private: + RequestThread_1_3(const RequestThread_1_3&) = delete; + RequestThread_1_3& operator=(const RequestThread_1_3&) = delete; + + /// storage for a prepared model and args for the asyncExecute call + struct AsyncExecuteData + { + AsyncExecuteData(PreparedModel<HalVersion>* model, + std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools, + std::shared_ptr<armnn::InputTensors>& inputTensors, + std::shared_ptr<armnn::OutputTensors>& outputTensors, + CallbackContext callbackContext) + : m_Model(model) + , m_MemPools(memPools) + , m_InputTensors(inputTensors) + , m_OutputTensors(outputTensors) + , m_CallbackContext(callbackContext) + { + } + + PreparedModel<HalVersion>* m_Model; + std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools; + std::shared_ptr<armnn::InputTensors> m_InputTensors; + std::shared_ptr<armnn::OutputTensors> m_OutputTensors; + CallbackContext m_CallbackContext; + }; + enum class ThreadMsgType + { + EXIT, // exit the thread + REQUEST // user request to process + }; + + /// storage for the thread message type and data + struct ThreadMsg + { + ThreadMsg(ThreadMsgType msgType, + std::shared_ptr<AsyncExecuteData>& msgData) + : type(msgType) + , data(msgData) + { + } + + ThreadMsgType type; + std::shared_ptr<AsyncExecuteData> data; + }; + + /// Add a prepared thread message to the thread queue. + /// @param[in] threadMsg the message to add to the queue + void PostMsg(std::shared_ptr<ThreadMsg>& pThreadMsg, V1_3::Priority priority = V1_3::Priority::MEDIUM); + + /// Entry point for the request thread + void Process(); + + std::unique_ptr<std::thread> m_Thread; + std::queue<std::shared_ptr<ThreadMsg>> m_HighPriorityQueue; + std::queue<std::shared_ptr<ThreadMsg>> m_MediumPriorityQueue; + std::queue<std::shared_ptr<ThreadMsg>> m_LowPriorityQueue; + std::mutex m_Mutex; + std::condition_variable m_Cv; +}; + +} // namespace armnn_driver |