aboutsummaryrefslogtreecommitdiff
path: root/src/backends/gpuFsa/GpuFsaBackendContext.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackendContext.cpp')
-rw-r--r--src/backends/gpuFsa/GpuFsaBackendContext.cpp230
1 files changed, 230 insertions, 0 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackendContext.cpp b/src/backends/gpuFsa/GpuFsaBackendContext.cpp
new file mode 100644
index 0000000000..72b77e0d19
--- /dev/null
+++ b/src/backends/gpuFsa/GpuFsaBackendContext.cpp
@@ -0,0 +1,230 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GpuFsaBackendContext.hpp"
+#include "GpuFsaContextControl.hpp"
+
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+namespace armnn
+{
+
+struct GpuFsaBackendContext::GpuFsaContextControlWrapper
+{
+ GpuFsaContextControlWrapper(arm_compute::CLTuner* tuner,
+ arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
+ bool profilingEnabled)
+ : m_GpuFsaContextControl(tuner, heuristicsHandle, profilingEnabled)
+ {}
+
+ bool Sync()
+ {
+ if (arm_compute::CLScheduler::get().context()() != NULL)
+ {
+ // Waits for all queued CL requests to finish before unloading the network they may be using.
+ try
+ {
+ // Coverity fix: arm_compute::CLScheduler::sync() may throw an exception of type cl::Error.
+ arm_compute::CLScheduler::get().sync();
+ }
+ catch (const cl::Error&)
+ {
+ ARMNN_LOG(warning) << "Runtime::UnloadNetwork(): an error occurred while waiting for "
+ "the queued CL requests to finish";
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ void ClearClCache()
+ {
+ if (arm_compute::CLScheduler::get().context()() != NULL)
+ {
+ // There are no loaded networks left, so clear the CL cache to free up memory
+ m_GpuFsaContextControl.ClearClCache();
+ }
+ }
+
+ GpuFsaContextControl m_GpuFsaContextControl;
+};
+
+GpuFsaBackendContext::GpuFsaBackendContext(const IRuntime::CreationOptions& options)
+ : IBackendContext(options)
+ , m_TuningFile()
+{
+ bool kernelProfiling = options.m_EnableGpuProfiling;
+
+ arm_compute::CLTuner* tuner = nullptr;
+ arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr;
+ bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
+ if (useLegacyTunerAPI)
+ {
+ auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>(
+ options.m_GpuAccTunedParameters.get());
+ tuner = &clTunerParams->m_Tuner;
+
+ if (tuner)
+ {
+ auto ConvertTuningLevel = [](IGpuAccTunedParameters::TuningLevel level,
+ armnn::IGpuAccTunedParameters::Mode mode)
+ {
+ if (mode == armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+ {
+ return TuningLevel::None;
+ }
+
+ switch(level)
+ {
+ case IGpuAccTunedParameters::TuningLevel::Rapid:
+ return TuningLevel::Rapid;
+ case IGpuAccTunedParameters::TuningLevel::Normal:
+ return TuningLevel::Normal;
+ case IGpuAccTunedParameters::TuningLevel::Exhaustive:
+ return TuningLevel::Exhaustive;
+ default:
+ {
+ ARMNN_ASSERT_MSG(false, "Tuning level not recognised.");
+ return TuningLevel::None;
+ }
+ }
+ };
+
+ TuningLevel tuningLevel = ConvertTuningLevel(clTunerParams->m_TuningLevel, clTunerParams->m_Mode);
+ ConfigureTuner(*tuner, tuningLevel);
+ }
+ }
+ else //New backend options API
+ {
+ const TuningLevel defaultTuningLevel = TuningLevel::None;
+ auto tuningLevel = defaultTuningLevel;
+
+ ParseOptions(options.m_BackendOptions, "GpuFsa", [&](std::string name, const BackendOptions::Var& value)
+ {
+ if (name == "KernelProfilingEnabled")
+ {
+ kernelProfiling |= ParseBooleanBackendOption(value, false);
+ } else if (name == "TuningFile")
+ {
+ m_TuningFile = ParseStringBackendOption(value, "");
+ } else if (name == "TuningLevel")
+ {
+ tuningLevel = ParseTuningLevel(value, defaultTuningLevel);
+ }
+ else if (name == "MLGOTuningFilePath")
+ {
+ m_MLGOTuningFile = ParseStringBackendOption(value, "");
+ }
+ });
+
+ // Create the tuner, in tuning mode initially.
+ m_Tuner = std::make_unique<arm_compute::CLTuner>(true);
+
+ ConfigureTuner(*(m_Tuner.get()), tuningLevel);
+
+ if (!m_TuningFile.empty())
+ {
+ try
+ {
+ ARMNN_LOG(info) << "Loading Gpu tuning data from file: " << m_TuningFile;
+ m_Tuner->load_from_file(m_TuningFile.c_str());
+ }
+ catch (const std::exception& e)
+ {
+ // Warn if not tuning, otherwise tuning will generate new params
+ if (tuningLevel == TuningLevel::None)
+ {
+ ARMNN_LOG(warning) << "Could not load GpuFsa tuner data file.";
+ }
+ }
+ }
+
+ if (!m_MLGOTuningFile.empty())
+ {
+ try
+ {
+ ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile;
+ if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str()))
+ {
+ mlgoTuner = &m_MLGOTuner;
+ }
+ }
+ catch (const std::exception& e)
+ {
+ ARMNN_LOG(warning) << "Could not load GpuFsa MLGO tuner data file.";
+ }
+ }
+
+ tuner = m_Tuner.get();
+ }
+
+ m_GpuFsaContextControlWrapper = std::make_unique<GpuFsaContextControlWrapper>(
+ tuner,
+ mlgoTuner,
+ kernelProfiling
+ );
+}
+
+bool GpuFsaBackendContext::BeforeLoadNetwork(NetworkId)
+{
+ return true;
+}
+
+bool GpuFsaBackendContext::AfterLoadNetwork(NetworkId networkId)
+{
+ {
+ std::lock_guard<std::mutex> lockGuard(m_Mutex);
+ m_NetworkIds.insert(networkId);
+ }
+ return true;
+}
+
+bool GpuFsaBackendContext::BeforeUnloadNetwork(NetworkId)
+{
+ return m_GpuFsaContextControlWrapper->Sync();
+}
+
+bool GpuFsaBackendContext::AfterUnloadNetwork(NetworkId networkId)
+{
+ bool clearCache = false;
+ {
+ std::lock_guard<std::mutex> lockGuard(m_Mutex);
+ m_NetworkIds.erase(networkId);
+ clearCache = m_NetworkIds.empty();
+ }
+
+ if (clearCache)
+ {
+ m_GpuFsaContextControlWrapper->ClearClCache();
+ }
+
+ return true;
+}
+
+bool GpuFsaBackendContext::AfterEnqueueWorkload(NetworkId)
+{
+ return m_GpuFsaContextControlWrapper->Sync();
+}
+
+GpuFsaBackendContext::~GpuFsaBackendContext()
+{
+ if (m_Tuner && !m_TuningFile.empty())
+ {
+ try
+ {
+ m_Tuner->save_to_file(m_TuningFile.c_str());
+ }
+ catch(const std::exception& e)
+ {
+ ARMNN_LOG(warning) << "Could not save GpuFsa tuner data to file " << m_TuningFile;
+ }
+ }
+}
+
+} // namespace armnn \ No newline at end of file