From 406463269f55a5baefb941b51e10f423f6d3250a Mon Sep 17 00:00:00 2001 From: Finn Williams Date: Thu, 11 Feb 2021 16:16:42 +0000 Subject: IVGCVSW-5686 Add GpuAcc MLGO tuning file configuration argument Signed-off-by: Finn Williams Change-Id: I3f320499c379162f9d1b00cc8816bd144cd7eee4 --- delegate/src/armnn_external_delegate.cpp | 5 + src/backends/cl/ClBackendContext.cpp | 28 ++++- src/backends/cl/ClBackendContext.hpp | 5 + src/backends/cl/ClContextControl.cpp | 4 +- src/backends/cl/ClContextControl.hpp | 4 + src/backends/cl/test/ClContextControlFixture.hpp | 2 +- src/backends/cl/test/ClOptimizedNetworkTests.cpp | 113 +++++++++++++++++++++ src/backends/cl/test/OpenClTimerTest.cpp | 2 +- tests/ExecuteNetwork/ExecuteNetwork.cpp | 1 + tests/ExecuteNetwork/ExecuteNetworkParams.hpp | 1 + .../ExecuteNetworkProgramOptions.cpp | 9 +- tests/InferenceModel.hpp | 7 +- 12 files changed, 173 insertions(+), 8 deletions(-) diff --git a/delegate/src/armnn_external_delegate.cpp b/delegate/src/armnn_external_delegate.cpp index 4dba07d3b8..edf46efb98 100644 --- a/delegate/src/armnn_external_delegate.cpp +++ b/delegate/src/armnn_external_delegate.cpp @@ -125,6 +125,11 @@ TfLiteDelegate* tflite_plugin_create_delegate(char** options_keys, armnn::BackendOptions option("GpuAcc", {{"TuningLevel", atoi(options_values[i])}}); options.AddBackendOption(option); } + else if (std::string(options_keys[i]) == std::string("gpu-mlgo-tuning-file")) + { + armnn::BackendOptions option("GpuAcc", {{"MLGOTuningFilePath", std::string(options_values[i])}}); + options.AddBackendOption(option); + } else if (std::string(options_keys[i]) == std::string("gpu-tuning-file")) { armnn::BackendOptions option("GpuAcc", {{"TuningFile", std::string(options_values[i])}}); diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp index 125f01b627..9c5cca9d3a 100644 --- a/src/backends/cl/ClBackendContext.cpp +++ b/src/backends/cl/ClBackendContext.cpp @@ -21,8 +21,9 @@ namespace armnn struct ClBackendContext::ClContextControlWrapper { ClContextControlWrapper(arm_compute::CLTuner* tuner, + arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, bool profilingEnabled) - : m_ClContextControl(tuner, profilingEnabled) + : m_ClContextControl(tuner, heuristicsHandle, profilingEnabled) {} bool Sync() @@ -143,6 +144,7 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) bool kernelProfiling = options.m_EnableGpuProfiling; arm_compute::CLTuner* tuner = nullptr; + arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr; bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr; if (useLegacyTunerAPI) { @@ -197,6 +199,10 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) { tuningLevel = ParseTuningLevel(value, defaultTuningLevel); } + else if (name == "MLGOTuningFilePath") + { + m_MLGOTuningFile = ParseFile(value, ""); + } }); // Create the tuner, in tuning mode initially. @@ -216,13 +222,31 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) ARMNN_LOG(warning) << "Could not load GpuAcc tuner data file."; } } + + if (!m_MLGOTuningFile.empty()) + { + try + { + ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile; + if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str())) + { + mlgoTuner = &m_MLGOTuner; + } + } + catch (const std::exception& e) + { + ARMNN_LOG(warning) << "Could not load GpuAcc MLGO tuner data file."; + } + } + tuner = m_Tuner.get(); } m_ClContextControlWrapper = std::make_unique( tuner, + mlgoTuner, kernelProfiling - ); + ); } bool ClBackendContext::BeforeLoadNetwork(NetworkId) diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp index bcac0d245e..af988a96dc 100644 --- a/src/backends/cl/ClBackendContext.hpp +++ b/src/backends/cl/ClBackendContext.hpp @@ -9,6 +9,7 @@ #include #include +#include namespace armnn { @@ -35,6 +36,10 @@ private: std::unique_ptr m_Tuner; std::string m_TuningFile; + +protected: + arm_compute::CLGEMMHeuristicsHandle m_MLGOTuner; + std::string m_MLGOTuningFile; }; } // namespace armnn \ No newline at end of file diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp index 7ab825f59e..fd2d0f53eb 100644 --- a/src/backends/cl/ClContextControl.cpp +++ b/src/backends/cl/ClContextControl.cpp @@ -28,8 +28,10 @@ namespace armnn { ClContextControl::ClContextControl(arm_compute::CLTuner *tuner, + arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, bool profilingEnabled) : m_Tuner(tuner) + , m_HeuristicsHandle(heuristicsHandle) , m_ProfilingEnabled(profilingEnabled) { // Ignore m_ProfilingEnabled if unused to avoid compiling problems when ArmCompute is disabled. @@ -156,7 +158,7 @@ void ClContextControl::DoLoadOpenClRuntime(bool updateTunedParameters) // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute. arm_compute::CLKernelLibrary::get().init(".", context, device); - arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner); + arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle); } void ClContextControl::ClearClCache() diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp index 2ed43bc60b..4a640cdf22 100644 --- a/src/backends/cl/ClContextControl.hpp +++ b/src/backends/cl/ClContextControl.hpp @@ -7,6 +7,7 @@ #include "armnn/IRuntime.hpp" #include +#include namespace armnn { @@ -17,6 +18,7 @@ class ClContextControl public: ClContextControl(arm_compute::CLTuner* = nullptr, + arm_compute::CLGEMMHeuristicsHandle* = nullptr, bool profilingEnabled = false); virtual ~ClContextControl(); @@ -35,6 +37,7 @@ private: void DoLoadOpenClRuntime(bool updateTunedParameters); arm_compute::CLTuner* m_Tuner; + arm_compute::CLGEMMHeuristicsHandle* m_HeuristicsHandle; bool m_ProfilingEnabled; }; @@ -51,6 +54,7 @@ public: TuningLevel m_TuningLevel; arm_compute::CLTuner m_Tuner; + arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle; }; } // namespace armnn diff --git a/src/backends/cl/test/ClContextControlFixture.hpp b/src/backends/cl/test/ClContextControlFixture.hpp index 0371c69a0b..14970be139 100644 --- a/src/backends/cl/test/ClContextControlFixture.hpp +++ b/src/backends/cl/test/ClContextControlFixture.hpp @@ -13,7 +13,7 @@ struct ClContextControlFixtureBase { // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case ClContextControlFixtureBase() - : m_ClContextControl(nullptr, ProfilingEnabled) {} + : m_ClContextControl(nullptr, nullptr, ProfilingEnabled) {} armnn::ClContextControl m_ClContextControl; }; diff --git a/src/backends/cl/test/ClOptimizedNetworkTests.cpp b/src/backends/cl/test/ClOptimizedNetworkTests.cpp index 2797080360..dddc5aa8bc 100644 --- a/src/backends/cl/test/ClOptimizedNetworkTests.cpp +++ b/src/backends/cl/test/ClOptimizedNetworkTests.cpp @@ -10,6 +10,10 @@ #include #include +#include + +#include + #include @@ -130,4 +134,113 @@ BOOST_AUTO_TEST_CASE(FastMathEnabledTestOnGpuAcc) BOOST_TEST(modelOptionsOut[0].GetOption(0).GetValue().AsBool() == true); } +BOOST_AUTO_TEST_CASE(CheckMLGOTuningFile) +{ + class ClBackendContextTestClass : public armnn::ClBackendContext + { + public: + ClBackendContextTestClass(const armnn::IRuntime::CreationOptions &options) : ClBackendContext(options) + {} + + bool call_reload_from_file() + { + return m_MLGOTuner.reload_from_file(m_MLGOTuningFile); + } + }; + + const std::string validText{ + "
\n" + "gemm-version, [1,2,1]\n" + "ip-type,gpu\n" + "
\n" + "\n" + "0, g71 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]\n" + "1, g71 , 8, f32, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]\n" + "2, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]\n" + "3, g71 , 8, qasymm8, best-performance, static, gemm-type, [m,n,k,n]\n" + "4, g71 , 8, qasymm8, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]\n" + "5, g71 , 8, qasymm8, best-performance, static, gemm-config-native, [m,n,k,n]\n" + "\n" + "\n" + "b , 0, var, r_mn, >=, num, 2., 1, 2\n" + "l , 1, gemm-type, reshaped\n" + "l , 2, gemm-type, reshaped-only-rhs\n" + "\n" + "\n" + "l ,0,gemm-config-reshaped-only-rhs, [2, 4,4,4,1,1,0]\n" + "\n" + "\n" + "l ,0,gemm-config-reshaped,[4,2,8,16,16,1,0,1,0]\n" + "\n" + "\n" + "l , 0, gemm-type, native\n" + "\n" + "\n" + "l ,0,gemm-config-reshaped-only-rhs, [2, 4,4,4,1,1,0]\n" + "\n" + "\n" + "l ,0,gemm-config-native,[4,2,8]\n" + "\n"}; + + const std::string invalidText{"ʕノ•ᴥ•ʔノ ︵ ┻━┻"}; + + fs::path validFile = armnnUtils::Filesystem::NamedTempFile("validFile.mlgo"); + fs::path invalidFile = armnnUtils::Filesystem::NamedTempFile("invalidFile.mlgo"); + + try + { + std::ofstream ofs1{validFile}; + ofs1 << validText << std::endl; + ofs1.close(); + + std::ofstream ofs2{invalidFile}; + ofs2 << invalidText << std::endl; + ofs2.close(); + } + catch (std::exception &e) + { + std::cerr << "Unable to write to file at location [" << validFile.c_str() << "] : " << e.what() << std::endl; + BOOST_TEST(false); + } + + armnn::IRuntime::CreationOptions creationOptions1; + armnn::BackendOptions validOptions + { + "GpuAcc", + { + {"MLGOTuningFilePath", validFile.c_str()} + } + }; + + creationOptions1.m_BackendOptions.emplace_back(validOptions); + ClBackendContextTestClass clBackendContext1(creationOptions1); + BOOST_TEST(clBackendContext1.call_reload_from_file()); + + armnn::BackendOptions invalidOptions + { + "GpuAcc", + { + {"MLGOTuningFilePath", invalidFile.c_str()} + } + }; + + armnn::IRuntime::CreationOptions creationOptions2; + creationOptions2.m_BackendOptions.emplace_back(invalidOptions); + ClBackendContextTestClass clBackendContext2(creationOptions2); + BOOST_TEST(clBackendContext2.call_reload_from_file() == false); + + armnn::BackendOptions invalidPathOptions + { + "GpuAcc", + { + {"MLGOTuningFilePath", "not_a_real_file_path"} + } + }; + + armnn::IRuntime::CreationOptions creationOptions3; + creationOptions3.m_BackendOptions.emplace_back(invalidPathOptions); + ClBackendContextTestClass clBackendContext3(creationOptions3); + BOOST_TEST(clBackendContext3.call_reload_from_file() == false); +} + BOOST_AUTO_TEST_SUITE_END(); diff --git a/src/backends/cl/test/OpenClTimerTest.cpp b/src/backends/cl/test/OpenClTimerTest.cpp index 68a356a5d8..0e1f28ec4e 100644 --- a/src/backends/cl/test/OpenClTimerTest.cpp +++ b/src/backends/cl/test/OpenClTimerTest.cpp @@ -32,7 +32,7 @@ struct OpenClFixture // Initialising ClContextControl to ensure OpenCL is loaded correctly for each test case. // NOTE: Profiling needs to be enabled in ClContextControl to be able to obtain execution // times from OpenClTimer. - OpenClFixture() : m_ClContextControl(nullptr, true) {} + OpenClFixture() : m_ClContextControl(nullptr, nullptr, true) {} ~OpenClFixture() {} ClContextControl m_ClContextControl; diff --git a/tests/ExecuteNetwork/ExecuteNetwork.cpp b/tests/ExecuteNetwork/ExecuteNetwork.cpp index c19f519c73..e3ca22e0ff 100644 --- a/tests/ExecuteNetwork/ExecuteNetwork.cpp +++ b/tests/ExecuteNetwork/ExecuteNetwork.cpp @@ -309,6 +309,7 @@ int MainImpl(const ExecuteNetworkParams& params, inferenceModelParams.m_SaveCachedNetwork = params.m_SaveCachedNetwork; inferenceModelParams.m_CachedNetworkFilePath = params.m_CachedNetworkFilePath; inferenceModelParams.m_NumberOfThreads = params.m_NumberOfThreads; + inferenceModelParams.m_MLGOTuningFilePath = params.m_MLGOTuningFilePath; for(const std::string& inputName: params.m_InputNames) { diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp index 830270adbc..a30ce57147 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.hpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.hpp @@ -46,6 +46,7 @@ struct ExecuteNetworkParams double m_ThresholdTime; int m_TuningLevel; std::string m_TuningPath; + std::string m_MLGOTuningFilePath; // Ensures that the parameters for ExecuteNetwork fit together void ValidateParams(); diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index 73da1f1d1d..0eaf8da890 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -310,7 +310,11 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", ("tuning-path", "Path to tuning file. Enables use of CL tuning", - cxxopts::value(m_ExNetParams.m_TuningPath)); + cxxopts::value(m_ExNetParams.m_TuningPath)) + + ("MLGOTuningFilePath", + "Path to tuning file. Enables use of CL MLGO tuning", + cxxopts::value(m_ExNetParams.m_MLGOTuningFilePath)); m_CxxOptions.add_options("d) Profiling") ("a,enable-external-profiling", @@ -427,7 +431,8 @@ void ProgramOptions::ParseOptions(int ac, const char* av[]) { {"TuningLevel", m_ExNetParams.m_TuningLevel}, {"TuningFile", m_ExNetParams.m_TuningPath.c_str()}, - {"KernelProfilingEnabled", m_ExNetParams.m_EnableProfiling} + {"KernelProfilingEnabled", m_ExNetParams.m_EnableProfiling}, + {"MLGOTuningFilePath", m_ExNetParams.m_MLGOTuningFilePath} } } ); diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index d20bb2271f..79962623cb 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -100,6 +100,8 @@ struct Params bool m_SaveCachedNetwork; std::string m_CachedNetworkFilePath; unsigned int m_NumberOfThreads; + std::string m_MLGOTuningFilePath; + Params() : m_ComputeDevices{} @@ -115,6 +117,7 @@ struct Params , m_SaveCachedNetwork(false) , m_CachedNetworkFilePath("") , m_NumberOfThreads(0) + , m_MLGOTuningFilePath("") {} }; @@ -434,8 +437,10 @@ public: { { "FastMathEnabled", params.m_EnableFastMath }, { "SaveCachedNetwork", params.m_SaveCachedNetwork }, - { "CachedNetworkFilePath", params.m_CachedNetworkFilePath } + { "CachedNetworkFilePath", params.m_CachedNetworkFilePath }, + { "MLGOTuningFilePath", params.m_MLGOTuningFilePath } }); + armnn::BackendOptions cpuAcc("CpuAcc", { { "FastMathEnabled", params.m_EnableFastMath }, -- cgit v1.2.1