diff options
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackendContext.cpp')
-rw-r--r-- | src/backends/gpuFsa/GpuFsaBackendContext.cpp | 230 |
1 files changed, 230 insertions, 0 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackendContext.cpp b/src/backends/gpuFsa/GpuFsaBackendContext.cpp new file mode 100644 index 0000000000..72b77e0d19 --- /dev/null +++ b/src/backends/gpuFsa/GpuFsaBackendContext.cpp @@ -0,0 +1,230 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "GpuFsaBackendContext.hpp" +#include "GpuFsaContextControl.hpp" + +#include <armnn/utility/Assert.hpp> +#include <armnn/utility/PolymorphicDowncast.hpp> + +#include <arm_compute/runtime/CL/CLScheduler.h> + +namespace armnn +{ + +struct GpuFsaBackendContext::GpuFsaContextControlWrapper +{ + GpuFsaContextControlWrapper(arm_compute::CLTuner* tuner, + arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, + bool profilingEnabled) + : m_GpuFsaContextControl(tuner, heuristicsHandle, profilingEnabled) + {} + + bool Sync() + { + if (arm_compute::CLScheduler::get().context()() != NULL) + { + // Waits for all queued CL requests to finish before unloading the network they may be using. + try + { + // Coverity fix: arm_compute::CLScheduler::sync() may throw an exception of type cl::Error. + arm_compute::CLScheduler::get().sync(); + } + catch (const cl::Error&) + { + ARMNN_LOG(warning) << "Runtime::UnloadNetwork(): an error occurred while waiting for " + "the queued CL requests to finish"; + return false; + } + } + + return true; + } + + void ClearClCache() + { + if (arm_compute::CLScheduler::get().context()() != NULL) + { + // There are no loaded networks left, so clear the CL cache to free up memory + m_GpuFsaContextControl.ClearClCache(); + } + } + + GpuFsaContextControl m_GpuFsaContextControl; +}; + +GpuFsaBackendContext::GpuFsaBackendContext(const IRuntime::CreationOptions& options) + : IBackendContext(options) + , m_TuningFile() +{ + bool kernelProfiling = options.m_EnableGpuProfiling; + + arm_compute::CLTuner* tuner = nullptr; + arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr; + bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr; + if (useLegacyTunerAPI) + { + auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>( + options.m_GpuAccTunedParameters.get()); + tuner = &clTunerParams->m_Tuner; + + if (tuner) + { + auto ConvertTuningLevel = [](IGpuAccTunedParameters::TuningLevel level, + armnn::IGpuAccTunedParameters::Mode mode) + { + if (mode == armnn::IGpuAccTunedParameters::Mode::UseTunedParameters) + { + return TuningLevel::None; + } + + switch(level) + { + case IGpuAccTunedParameters::TuningLevel::Rapid: + return TuningLevel::Rapid; + case IGpuAccTunedParameters::TuningLevel::Normal: + return TuningLevel::Normal; + case IGpuAccTunedParameters::TuningLevel::Exhaustive: + return TuningLevel::Exhaustive; + default: + { + ARMNN_ASSERT_MSG(false, "Tuning level not recognised."); + return TuningLevel::None; + } + } + }; + + TuningLevel tuningLevel = ConvertTuningLevel(clTunerParams->m_TuningLevel, clTunerParams->m_Mode); + ConfigureTuner(*tuner, tuningLevel); + } + } + else //New backend options API + { + const TuningLevel defaultTuningLevel = TuningLevel::None; + auto tuningLevel = defaultTuningLevel; + + ParseOptions(options.m_BackendOptions, "GpuFsa", [&](std::string name, const BackendOptions::Var& value) + { + if (name == "KernelProfilingEnabled") + { + kernelProfiling |= ParseBooleanBackendOption(value, false); + } else if (name == "TuningFile") + { + m_TuningFile = ParseStringBackendOption(value, ""); + } else if (name == "TuningLevel") + { + tuningLevel = ParseTuningLevel(value, defaultTuningLevel); + } + else if (name == "MLGOTuningFilePath") + { + m_MLGOTuningFile = ParseStringBackendOption(value, ""); + } + }); + + // Create the tuner, in tuning mode initially. + m_Tuner = std::make_unique<arm_compute::CLTuner>(true); + + ConfigureTuner(*(m_Tuner.get()), tuningLevel); + + if (!m_TuningFile.empty()) + { + try + { + ARMNN_LOG(info) << "Loading Gpu tuning data from file: " << m_TuningFile; + m_Tuner->load_from_file(m_TuningFile.c_str()); + } + catch (const std::exception& e) + { + // Warn if not tuning, otherwise tuning will generate new params + if (tuningLevel == TuningLevel::None) + { + ARMNN_LOG(warning) << "Could not load GpuFsa tuner data file."; + } + } + } + + if (!m_MLGOTuningFile.empty()) + { + try + { + ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile; + if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str())) + { + mlgoTuner = &m_MLGOTuner; + } + } + catch (const std::exception& e) + { + ARMNN_LOG(warning) << "Could not load GpuFsa MLGO tuner data file."; + } + } + + tuner = m_Tuner.get(); + } + + m_GpuFsaContextControlWrapper = std::make_unique<GpuFsaContextControlWrapper>( + tuner, + mlgoTuner, + kernelProfiling + ); +} + +bool GpuFsaBackendContext::BeforeLoadNetwork(NetworkId) +{ + return true; +} + +bool GpuFsaBackendContext::AfterLoadNetwork(NetworkId networkId) +{ + { + std::lock_guard<std::mutex> lockGuard(m_Mutex); + m_NetworkIds.insert(networkId); + } + return true; +} + +bool GpuFsaBackendContext::BeforeUnloadNetwork(NetworkId) +{ + return m_GpuFsaContextControlWrapper->Sync(); +} + +bool GpuFsaBackendContext::AfterUnloadNetwork(NetworkId networkId) +{ + bool clearCache = false; + { + std::lock_guard<std::mutex> lockGuard(m_Mutex); + m_NetworkIds.erase(networkId); + clearCache = m_NetworkIds.empty(); + } + + if (clearCache) + { + m_GpuFsaContextControlWrapper->ClearClCache(); + } + + return true; +} + +bool GpuFsaBackendContext::AfterEnqueueWorkload(NetworkId) +{ + return m_GpuFsaContextControlWrapper->Sync(); +} + +GpuFsaBackendContext::~GpuFsaBackendContext() +{ + if (m_Tuner && !m_TuningFile.empty()) + { + try + { + m_Tuner->save_to_file(m_TuningFile.c_str()); + } + catch(const std::exception& e) + { + ARMNN_LOG(warning) << "Could not save GpuFsa tuner data to file " << m_TuningFile; + } + } +} + +} // namespace armnn
\ No newline at end of file |