aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-06-02 12:07:43 +0100
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2020-06-02 12:07:43 +0100
commitcad4e91027a29a62c210d422ce1c9130e46f2199 (patch)
tree5a7726242f7ee25ba6d5a549245ad52c63e4e902
parent24a9c5847a994eb4909aecd3c551f16a69050c39 (diff)
downloadandroid-nn-driver-cad4e91027a29a62c210d422ce1c9130e46f2199.tar.gz
IVGCVSW-4780 Add QoS to AndroidNNDriver
* Add model priority to ArmnnPreparedModel_1_3 * Add RequestThread_1_3 to allow execution based on priority * Add RETIRE_RATE to Android.mk to be able to configure the retire rate Signed-off-by: Narumol Prangnawarat <narumol.prangnawarat@arm.com> Change-Id: Ic5f4309249b744c2a8f625c986eede381a26028b
-rw-r--r--1.3/ArmnnDriver.hpp3
-rw-r--r--1.3/ArmnnDriverImpl.cpp6
-rw-r--r--1.3/ArmnnDriverImpl.hpp3
-rw-r--r--Android.mk7
-rw-r--r--ArmnnPreparedModel_1_3.cpp12
-rw-r--r--ArmnnPreparedModel_1_3.hpp10
-rw-r--r--RequestThread.cpp1
-rw-r--r--RequestThread_1_3.cpp193
-rw-r--r--RequestThread_1_3.hpp106
9 files changed, 331 insertions, 10 deletions
diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp
index 798c4381..b6b55fae 100644
--- a/1.3/ArmnnDriver.hpp
+++ b/1.3/ArmnnDriver.hpp
@@ -206,7 +206,8 @@ public:
model,
cb,
model.relaxComputationFloat32toFloat16
- && m_Options.GetFp16Enabled());
+ && m_Options.GetFp16Enabled(),
+ priority);
}
Return<void> getSupportedExtensions(getSupportedExtensions_cb cb)
diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp
index 4b2ff148..6168c9d0 100644
--- a/1.3/ArmnnDriverImpl.cpp
+++ b/1.3/ArmnnDriverImpl.cpp
@@ -101,7 +101,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
const DriverOptions& options,
const V1_3::Model& model,
const sp<V1_3::IPreparedModelCallback>& cb,
- bool float32ToFloat16)
+ bool float32ToFloat16,
+ V1_3::Priority priority)
{
ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()");
@@ -204,7 +205,8 @@ Return<V1_3::ErrorStatus> ArmnnDriverImpl::prepareArmnnModel_1_3(
runtime.get(),
model,
options.GetRequestInputsAndOutputsDumpDir(),
- options.IsGpuProfilingEnabled()));
+ options.IsGpuProfilingEnabled(),
+ priority));
// Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if
// this is enabled) before the first 'real' inference which removes the overhead of the first inference.
diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp
index 8a665ea5..2b39d4e0 100644
--- a/1.3/ArmnnDriverImpl.hpp
+++ b/1.3/ArmnnDriverImpl.hpp
@@ -30,7 +30,8 @@ public:
const DriverOptions& options,
const V1_3::Model& model,
const android::sp<V1_3::IPreparedModelCallback>& cb,
- bool float32ToFloat16 = false);
+ bool float32ToFloat16 = false,
+ V1_3::Priority priority = V1_3::Priority::MEDIUM);
static Return<void> getCapabilities_1_3(const armnn::IRuntimePtr& runtime,
V1_3::IDevice::getCapabilities_1_3_cb cb);
diff --git a/Android.mk b/Android.mk
index bf8bc65b..cf37efcd 100644
--- a/Android.mk
+++ b/Android.mk
@@ -72,6 +72,9 @@ ifeq ($(ARMNN_LIBOPENCL),0)
ARMNN_INCLUDE_LIBOPENCL := 0
endif
+# Variable to control retire rate of priority queue
+RETIRE_RATE := 3
+
#######################
# libarmnn-driver@1.0 #
#######################
@@ -486,6 +489,9 @@ LOCAL_CFLAGS += \
-DARMNNREF_ENABLED
endif # ARMNN_REF_ENABLED == 1
+LOCAL_CFLAGS += \
+ -DRETIRE_RATE=$(RETIRE_RATE)
+
LOCAL_SRC_FILES := \
1.0/ArmnnDriverImpl.cpp \
1.0/HalPolicy.cpp \
@@ -504,6 +510,7 @@ LOCAL_SRC_FILES := \
DriverOptions.cpp \
ModelToINetworkConverter.cpp \
RequestThread.cpp \
+ RequestThread_1_3.cpp \
Utils.cpp
LOCAL_STATIC_LIBRARIES := \
diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp
index 1fb03f44..c7adc6c4 100644
--- a/ArmnnPreparedModel_1_3.cpp
+++ b/ArmnnPreparedModel_1_3.cpp
@@ -138,7 +138,7 @@ namespace armnn_driver
{
template<typename HalVersion>
-RequestThread<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3>
+RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3>
ArmnnPreparedModel_1_3<HalVersion>::m_RequestThread;
template<typename HalVersion>
@@ -164,13 +164,15 @@ ArmnnPreparedModel_1_3<HalVersion>::ArmnnPreparedModel_1_3(armnn::NetworkId netw
armnn::IRuntime* runtime,
const V1_3::Model& model,
const std::string& requestInputsAndOutputsDumpDir,
- const bool gpuProfilingEnabled)
+ const bool gpuProfilingEnabled,
+ V1_3::Priority priority)
: m_NetworkId(networkId)
, m_Runtime(runtime)
, m_Model(model)
, m_RequestCount(0)
, m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir)
, m_GpuProfilingEnabled(gpuProfilingEnabled)
+ , m_ModelPriority(priority)
{
// Enable profiling if required.
m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled);
@@ -830,6 +832,12 @@ Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<HalVersion>::Execute(const V1_
return V1_3::ErrorStatus::NONE;
}
+template<typename HalVersion>
+V1_3::Priority ArmnnPreparedModel_1_3<HalVersion>::GetModelPriority()
+{
+ return m_ModelPriority;
+}
+
#ifdef ARMNN_ANDROID_NN_V1_3
template class ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>;
template Return <V1_3::ErrorStatus> ArmnnPreparedModel_1_3<hal_1_3::HalPolicy>::ExecuteGraph<CallbackContext_1_3>(
diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp
index 4dd15c17..5010bbdb 100644
--- a/ArmnnPreparedModel_1_3.hpp
+++ b/ArmnnPreparedModel_1_3.hpp
@@ -7,7 +7,7 @@
#include "ArmnnDriver.hpp"
#include "ArmnnDriverImpl.hpp"
-#include "RequestThread.hpp"
+#include "RequestThread_1_3.hpp"
#include "ModelToINetworkConverter.hpp"
#include <NeuralNetworks.h>
@@ -50,7 +50,8 @@ public:
armnn::IRuntime* runtime,
const HalModel& model,
const std::string& requestInputsAndOutputsDumpDir,
- const bool gpuProfilingEnabled);
+ const bool gpuProfilingEnabled,
+ V1_3::Priority priority = V1_3::Priority::MEDIUM);
virtual ~ArmnnPreparedModel_1_3();
@@ -105,6 +106,8 @@ public:
/// \return false on failure, otherwise true
bool ExecuteWithDummyInputs();
+ V1_3::Priority GetModelPriority();
+
private:
Return <V1_3::ErrorStatus> Execute(const V1_3::Request& request,
MeasureTiming measureTiming,
@@ -135,10 +138,11 @@ private:
V1_3::Model m_Model;
// There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads
// It is specific to this class, so it is declared as static here
- static RequestThread<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread;
+ static RequestThread_1_3<ArmnnPreparedModel_1_3, HalVersion, CallbackContext_1_3> m_RequestThread;
uint32_t m_RequestCount;
const std::string& m_RequestInputsAndOutputsDumpDir;
const bool m_GpuProfilingEnabled;
+ V1_3::Priority m_ModelPriority;
};
}
diff --git a/RequestThread.cpp b/RequestThread.cpp
index a177b1a4..927af922 100644
--- a/RequestThread.cpp
+++ b/RequestThread.cpp
@@ -161,7 +161,6 @@ template class RequestThread<ArmnnPreparedModel, hal_1_1::HalPolicy, CallbackCon
template class RequestThread<ArmnnPreparedModel, hal_1_2::HalPolicy, CallbackContext_1_0>;
template class RequestThread<ArmnnPreparedModel, hal_1_3::HalPolicy, CallbackContext_1_0>;
template class RequestThread<ArmnnPreparedModel_1_2, hal_1_2::HalPolicy, CallbackContext_1_2>;
-template class RequestThread<ArmnnPreparedModel_1_3, hal_1_3::HalPolicy, CallbackContext_1_3>;
#endif
} // namespace armnn_driver
diff --git a/RequestThread_1_3.cpp b/RequestThread_1_3.cpp
new file mode 100644
index 00000000..59fa70ed
--- /dev/null
+++ b/RequestThread_1_3.cpp
@@ -0,0 +1,193 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#define LOG_TAG "ArmnnDriver"
+
+#include "RequestThread_1_3.hpp"
+
+#include "ArmnnPreparedModel_1_3.hpp"
+
+#include <armnn/utility/Assert.hpp>
+
+#include <log/log.h>
+
+using namespace android;
+
+namespace armnn_driver
+{
+
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext>
+RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::RequestThread_1_3()
+{
+ ALOGV("RequestThread_1_3::RequestThread_1_3()");
+ m_Thread = std::make_unique<std::thread>(&RequestThread_1_3::Process, this);
+}
+
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext>
+RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::~RequestThread_1_3()
+{
+ ALOGV("RequestThread_1_3::~RequestThread_1_3()");
+
+ try
+ {
+ // Coverity fix: The following code may throw an exception of type std::length_error.
+
+ // This code is meant to to terminate the inner thread gracefully by posting an EXIT message
+ // to the thread's message queue. However, according to Coverity, this code could throw an exception and fail.
+ // Since only one static instance of RequestThread is used in the driver (in ArmnnPreparedModel),
+ // this destructor is called only when the application has been closed, which means that
+ // the inner thread will be terminated anyway, although abruptly, in the event that the destructor code throws.
+ // Wrapping the destructor's code with a try-catch block simply fixes the Coverity bug.
+
+ // Post an EXIT message to the thread
+ std::shared_ptr<AsyncExecuteData> nulldata(nullptr);
+ auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::EXIT, nulldata);
+ PostMsg(pMsg);
+ // Wait for the thread to terminate, it is deleted automatically
+ m_Thread->join();
+ }
+ catch (const std::exception&) { } // Swallow any exception.
+}
+
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext>
+void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::PostMsg(PreparedModel<HalVersion>* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext)
+{
+ ALOGV("RequestThread_1_3::PostMsg(...)");
+ auto data = std::make_shared<AsyncExecuteData>(model,
+ memPools,
+ inputTensors,
+ outputTensors,
+ callbackContext);
+ auto pMsg = std::make_shared<ThreadMsg>(ThreadMsgType::REQUEST, data);
+ PostMsg(pMsg, model->GetModelPriority());
+}
+
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext>
+void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::PostMsg(std::shared_ptr<ThreadMsg>& pMsg,
+ V1_3::Priority priority)
+{
+ ALOGV("RequestThread_1_3::PostMsg(pMsg)");
+ // Add a message to the queue and notify the request thread
+ std::unique_lock<std::mutex> lock(m_Mutex);
+ switch (priority) {
+ case V1_3::Priority::HIGH:
+ m_HighPriorityQueue.push(pMsg);
+ break;
+ case V1_3::Priority::LOW:
+ m_LowPriorityQueue.push(pMsg);
+ break;
+ case V1_3::Priority::MEDIUM:
+ default:
+ m_MediumPriorityQueue.push(pMsg);
+ }
+ m_Cv.notify_one();
+}
+
+template <template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext>
+void RequestThread_1_3<PreparedModel, HalVersion, CallbackContext>::Process()
+{
+ ALOGV("RequestThread_1_3::Process()");
+ int retireRate = RETIRE_RATE;
+ int highPriorityCount = 0;
+ int mediumPriorityCount = 0;
+ while (true)
+ {
+ std::shared_ptr<ThreadMsg> pMsg(nullptr);
+ {
+ // Wait for a message to be added to the queue
+ // This is in a separate scope to minimise the lifetime of the lock
+ std::unique_lock<std::mutex> lock(m_Mutex);
+ while (m_HighPriorityQueue.empty() && m_MediumPriorityQueue.empty() && m_LowPriorityQueue.empty())
+ {
+ m_Cv.wait(lock);
+ }
+ // Get the message to process from the front of each queue based on priority from high to low
+ // Get high priority first if it does not exceed the retire rate
+ if (!m_HighPriorityQueue.empty() && highPriorityCount < retireRate)
+ {
+ pMsg = m_HighPriorityQueue.front();
+ m_HighPriorityQueue.pop();
+ highPriorityCount += 1;
+ }
+ // If high priority queue is empty or the count exceeds the retire rate, get medium priority message
+ else if (!m_MediumPriorityQueue.empty() && mediumPriorityCount < retireRate)
+ {
+ pMsg = m_MediumPriorityQueue.front();
+ m_MediumPriorityQueue.pop();
+ mediumPriorityCount += 1;
+ // Reset high priority count
+ highPriorityCount = 0;
+ }
+ // If medium priority queue is empty or the count exceeds the retire rate, get low priority message
+ else if (!m_LowPriorityQueue.empty())
+ {
+ pMsg = m_LowPriorityQueue.front();
+ m_LowPriorityQueue.pop();
+ // Reset high and medium priority count
+ highPriorityCount = 0;
+ mediumPriorityCount = 0;
+ }
+ else
+ {
+ // Reset high and medium priority count
+ highPriorityCount = 0;
+ mediumPriorityCount = 0;
+ continue;
+ }
+ }
+
+ switch (pMsg->type)
+ {
+ case ThreadMsgType::REQUEST:
+ {
+ ALOGV("RequestThread_1_3::Process() - request");
+ // invoke the asynchronous execution method
+ PreparedModel<HalVersion>* model = pMsg->data->m_Model;
+ model->ExecuteGraph(pMsg->data->m_MemPools,
+ *(pMsg->data->m_InputTensors),
+ *(pMsg->data->m_OutputTensors),
+ pMsg->data->m_CallbackContext);
+ break;
+ }
+
+ case ThreadMsgType::EXIT:
+ {
+ ALOGV("RequestThread_1_3::Process() - exit");
+ // delete all remaining messages (there should not be any)
+ std::unique_lock<std::mutex> lock(m_Mutex);
+ while (!m_HighPriorityQueue.empty())
+ {
+ m_HighPriorityQueue.pop();
+ }
+ while (!m_MediumPriorityQueue.empty())
+ {
+ m_MediumPriorityQueue.pop();
+ }
+ while (!m_LowPriorityQueue.empty())
+ {
+ m_LowPriorityQueue.pop();
+ }
+ return;
+ }
+
+ default:
+ // this should be unreachable
+ ALOGE("RequestThread_1_3::Process() - invalid message type");
+ ARMNN_ASSERT_MSG(false, "ArmNN: RequestThread_1_3: invalid message type");
+ }
+ }
+}
+
+///
+/// Class template specializations
+///
+
+template class RequestThread_1_3<ArmnnPreparedModel_1_3, hal_1_3::HalPolicy, CallbackContext_1_3>;
+
+} // namespace armnn_driver
diff --git a/RequestThread_1_3.hpp b/RequestThread_1_3.hpp
new file mode 100644
index 00000000..c8abc5e5
--- /dev/null
+++ b/RequestThread_1_3.hpp
@@ -0,0 +1,106 @@
+//
+// Copyright © 2020 Arm Ltd. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include <queue>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+
+#include "ArmnnDriver.hpp"
+#include "ArmnnDriverImpl.hpp"
+
+#include <CpuExecutor.h>
+#include <armnn/ArmNN.hpp>
+
+namespace armnn_driver
+{
+using TimePoint = std::chrono::steady_clock::time_point;
+
+template<template <typename HalVersion> class PreparedModel, typename HalVersion, typename CallbackContext>
+class RequestThread_1_3
+{
+public:
+ /// Constructor creates the thread
+ RequestThread_1_3();
+
+ /// Destructor terminates the thread
+ ~RequestThread_1_3();
+
+ /// Add a message to the thread queue.
+ /// @param[in] model pointer to the prepared model handling the request
+ /// @param[in] memPools pointer to the memory pools vector for the tensors
+ /// @param[in] inputTensors pointer to the input tensors for the request
+ /// @param[in] outputTensors pointer to the output tensors for the request
+ /// @param[in] callback the android notification callback
+ void PostMsg(PreparedModel<HalVersion>* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext);
+
+private:
+ RequestThread_1_3(const RequestThread_1_3&) = delete;
+ RequestThread_1_3& operator=(const RequestThread_1_3&) = delete;
+
+ /// storage for a prepared model and args for the asyncExecute call
+ struct AsyncExecuteData
+ {
+ AsyncExecuteData(PreparedModel<HalVersion>* model,
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>>& memPools,
+ std::shared_ptr<armnn::InputTensors>& inputTensors,
+ std::shared_ptr<armnn::OutputTensors>& outputTensors,
+ CallbackContext callbackContext)
+ : m_Model(model)
+ , m_MemPools(memPools)
+ , m_InputTensors(inputTensors)
+ , m_OutputTensors(outputTensors)
+ , m_CallbackContext(callbackContext)
+ {
+ }
+
+ PreparedModel<HalVersion>* m_Model;
+ std::shared_ptr<std::vector<::android::nn::RunTimePoolInfo>> m_MemPools;
+ std::shared_ptr<armnn::InputTensors> m_InputTensors;
+ std::shared_ptr<armnn::OutputTensors> m_OutputTensors;
+ CallbackContext m_CallbackContext;
+ };
+ enum class ThreadMsgType
+ {
+ EXIT, // exit the thread
+ REQUEST // user request to process
+ };
+
+ /// storage for the thread message type and data
+ struct ThreadMsg
+ {
+ ThreadMsg(ThreadMsgType msgType,
+ std::shared_ptr<AsyncExecuteData>& msgData)
+ : type(msgType)
+ , data(msgData)
+ {
+ }
+
+ ThreadMsgType type;
+ std::shared_ptr<AsyncExecuteData> data;
+ };
+
+ /// Add a prepared thread message to the thread queue.
+ /// @param[in] threadMsg the message to add to the queue
+ void PostMsg(std::shared_ptr<ThreadMsg>& pThreadMsg, V1_3::Priority priority = V1_3::Priority::MEDIUM);
+
+ /// Entry point for the request thread
+ void Process();
+
+ std::unique_ptr<std::thread> m_Thread;
+ std::queue<std::shared_ptr<ThreadMsg>> m_HighPriorityQueue;
+ std::queue<std::shared_ptr<ThreadMsg>> m_MediumPriorityQueue;
+ std::queue<std::shared_ptr<ThreadMsg>> m_LowPriorityQueue;
+ std::mutex m_Mutex;
+ std::condition_variable m_Cv;
+};
+
+} // namespace armnn_driver