From cad4e91027a29a62c210d422ce1c9130e46f2199 Mon Sep 17 00:00:00 2001 From: Narumol Prangnawarat Date: Tue, 2 Jun 2020 12:07:43 +0100 Subject: IVGCVSW-4780 Add QoS to AndroidNNDriver * Add model priority to ArmnnPreparedModel_1_3 * Add RequestThread_1_3 to allow execution based on priority * Add RETIRE_RATE to Android.mk to be able to configure the retire rate Signed-off-by: Narumol Prangnawarat Change-Id: Ic5f4309249b744c2a8f625c986eede381a26028b --- 1.3/ArmnnDriver.hpp | 3 +- 1.3/ArmnnDriverImpl.cpp | 6 +- 1.3/ArmnnDriverImpl.hpp | 3 +- Android.mk | 7 ++ ArmnnPreparedModel_1_3.cpp | 12 ++- ArmnnPreparedModel_1_3.hpp | 10 ++- RequestThread.cpp | 1 - RequestThread_1_3.cpp | 193 +++++++++++++++++++++++++++++++++++++++++++++ RequestThread_1_3.hpp | 106 +++++++++++++++++++++++++ 9 files changed, 331 insertions(+), 10 deletions(-) create mode 100644 RequestThread_1_3.cpp create mode 100644 RequestThread_1_3.hpp diff --git a/1.3/ArmnnDriver.hpp b/1.3/ArmnnDriver.hpp index 798c4381..b6b55fae 100644 --- a/1.3/ArmnnDriver.hpp +++ b/1.3/ArmnnDriver.hpp @@ -206,7 +206,8 @@ public: model, cb, model.relaxComputationFloat32toFloat16 - && m_Options.GetFp16Enabled()); + && m_Options.GetFp16Enabled(), + priority); } Return getSupportedExtensions(getSupportedExtensions_cb cb) diff --git a/1.3/ArmnnDriverImpl.cpp b/1.3/ArmnnDriverImpl.cpp index 4b2ff148..6168c9d0 100644 --- a/1.3/ArmnnDriverImpl.cpp +++ b/1.3/ArmnnDriverImpl.cpp @@ -101,7 +101,8 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( const DriverOptions& options, const V1_3::Model& model, const sp& cb, - bool float32ToFloat16) + bool float32ToFloat16, + V1_3::Priority priority) { ALOGV("ArmnnDriverImpl::prepareArmnnModel_1_3()"); @@ -204,7 +205,8 @@ Return ArmnnDriverImpl::prepareArmnnModel_1_3( runtime.get(), model, options.GetRequestInputsAndOutputsDumpDir(), - options.IsGpuProfilingEnabled())); + options.IsGpuProfilingEnabled(), + priority)); // Run a single 'dummy' inference of the model. This means that CL kernels will get compiled (and tuned if // this is enabled) before the first 'real' inference which removes the overhead of the first inference. diff --git a/1.3/ArmnnDriverImpl.hpp b/1.3/ArmnnDriverImpl.hpp index 8a665ea5..2b39d4e0 100644 --- a/1.3/ArmnnDriverImpl.hpp +++ b/1.3/ArmnnDriverImpl.hpp @@ -30,7 +30,8 @@ public: const DriverOptions& options, const V1_3::Model& model, const android::sp& cb, - bool float32ToFloat16 = false); + bool float32ToFloat16 = false, + V1_3::Priority priority = V1_3::Priority::MEDIUM); static Return getCapabilities_1_3(const armnn::IRuntimePtr& runtime, V1_3::IDevice::getCapabilities_1_3_cb cb); diff --git a/Android.mk b/Android.mk index bf8bc65b..cf37efcd 100644 --- a/Android.mk +++ b/Android.mk @@ -72,6 +72,9 @@ ifeq ($(ARMNN_LIBOPENCL),0) ARMNN_INCLUDE_LIBOPENCL := 0 endif +# Variable to control retire rate of priority queue +RETIRE_RATE := 3 + ####################### # libarmnn-driver@1.0 # ####################### @@ -486,6 +489,9 @@ LOCAL_CFLAGS += \ -DARMNNREF_ENABLED endif # ARMNN_REF_ENABLED == 1 +LOCAL_CFLAGS += \ + -DRETIRE_RATE=$(RETIRE_RATE) + LOCAL_SRC_FILES := \ 1.0/ArmnnDriverImpl.cpp \ 1.0/HalPolicy.cpp \ @@ -504,6 +510,7 @@ LOCAL_SRC_FILES := \ DriverOptions.cpp \ ModelToINetworkConverter.cpp \ RequestThread.cpp \ + RequestThread_1_3.cpp \ Utils.cpp LOCAL_STATIC_LIBRARIES := \ diff --git a/ArmnnPreparedModel_1_3.cpp b/ArmnnPreparedModel_1_3.cpp index 1fb03f44..c7adc6c4 100644 --- a/ArmnnPreparedModel_1_3.cpp +++ b/ArmnnPreparedModel_1_3.cpp @@ -138,7 +138,7 @@ namespace armnn_driver { template -RequestThread +RequestThread_1_3 ArmnnPreparedModel_1_3::m_RequestThread; template @@ -164,13 +164,15 @@ ArmnnPreparedModel_1_3::ArmnnPreparedModel_1_3(armnn::NetworkId netw armnn::IRuntime* runtime, const V1_3::Model& model, const std::string& requestInputsAndOutputsDumpDir, - const bool gpuProfilingEnabled) + const bool gpuProfilingEnabled, + V1_3::Priority priority) : m_NetworkId(networkId) , m_Runtime(runtime) , m_Model(model) , m_RequestCount(0) , m_RequestInputsAndOutputsDumpDir(requestInputsAndOutputsDumpDir) , m_GpuProfilingEnabled(gpuProfilingEnabled) + , m_ModelPriority(priority) { // Enable profiling if required. m_Runtime->GetProfiler(m_NetworkId)->EnableProfiling(m_GpuProfilingEnabled); @@ -830,6 +832,12 @@ Return ArmnnPreparedModel_1_3::Execute(const V1_ return V1_3::ErrorStatus::NONE; } +template +V1_3::Priority ArmnnPreparedModel_1_3::GetModelPriority() +{ + return m_ModelPriority; +} + #ifdef ARMNN_ANDROID_NN_V1_3 template class ArmnnPreparedModel_1_3; template Return ArmnnPreparedModel_1_3::ExecuteGraph( diff --git a/ArmnnPreparedModel_1_3.hpp b/ArmnnPreparedModel_1_3.hpp index 4dd15c17..5010bbdb 100644 --- a/ArmnnPreparedModel_1_3.hpp +++ b/ArmnnPreparedModel_1_3.hpp @@ -7,7 +7,7 @@ #include "ArmnnDriver.hpp" #include "ArmnnDriverImpl.hpp" -#include "RequestThread.hpp" +#include "RequestThread_1_3.hpp" #include "ModelToINetworkConverter.hpp" #include @@ -50,7 +50,8 @@ public: armnn::IRuntime* runtime, const HalModel& model, const std::string& requestInputsAndOutputsDumpDir, - const bool gpuProfilingEnabled); + const bool gpuProfilingEnabled, + V1_3::Priority priority = V1_3::Priority::MEDIUM); virtual ~ArmnnPreparedModel_1_3(); @@ -105,6 +106,8 @@ public: /// \return false on failure, otherwise true bool ExecuteWithDummyInputs(); + V1_3::Priority GetModelPriority(); + private: Return Execute(const V1_3::Request& request, MeasureTiming measureTiming, @@ -135,10 +138,11 @@ private: V1_3::Model m_Model; // There must be a single RequestThread for all ArmnnPreparedModel objects to ensure serial execution of workloads // It is specific to this class, so it is declared as static here - static RequestThread m_RequestThread; + static RequestThread_1_3 m_RequestThread; uint32_t m_RequestCount; const std::string& m_RequestInputsAndOutputsDumpDir; const bool m_GpuProfilingEnabled; + V1_3::Priority m_ModelPriority; }; } diff --git a/RequestThread.cpp b/RequestThread.cpp index a177b1a4..927af922 100644 --- a/RequestThread.cpp +++ b/RequestThread.cpp @@ -161,7 +161,6 @@ template class RequestThread; template class RequestThread; template class RequestThread; -template class RequestThread; #endif } // namespace armnn_driver diff --git a/RequestThread_1_3.cpp b/RequestThread_1_3.cpp new file mode 100644 index 00000000..59fa70ed --- /dev/null +++ b/RequestThread_1_3.cpp @@ -0,0 +1,193 @@ +// +// Copyright © 2020 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#define LOG_TAG "ArmnnDriver" + +#include "RequestThread_1_3.hpp" + +#include "ArmnnPreparedModel_1_3.hpp" + +#include + +#include + +using namespace android; + +namespace armnn_driver +{ + +template