1 files changed, 163 insertions, 0 deletions
diff --git a/src/backends/gpuFsa/GpuFsaContextControl.cpp b/src/backends/gpuFsa/GpuFsaContextControl.cpp
new file mode 100644
index 0000000000..795de5e14d
--- /dev/null
+++ b/src/backends/gpuFsa/GpuFsaContextControl.cpp
@@ -0,0 +1,163 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GpuFsaContextControl.hpp"
+
+#include <armnn/Exceptions.hpp>
+#include <armnn/utility/Assert.hpp>
+#include <LeakChecking.hpp>
+
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+
+#include <fmt/format.h>
+
+namespace cl
+{
+class Context;
+class CommandQueue;
+class Device;
+}
+
+namespace armnn
+{
+
+GpuFsaContextControl::GpuFsaContextControl(arm_compute::CLTuner *tuner,
+                                           arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
+                                           bool profilingEnabled)
+    : m_Tuner(tuner)
+    , m_HeuristicsHandle(heuristicsHandle)
+    , m_ProfilingEnabled(profilingEnabled)
+{
+    try
+    {
+        std::vector<cl::Platform> platforms;
+        cl::Platform::get(&platforms);
+
+        // Selects default platform for the first element.
+        cl::Platform::setDefault(platforms[0]);
+
+        std::vector<cl::Device> devices;
+        platforms[0].getDevices(CL_DEVICE_TYPE_GPU, &devices);
+
+        // Selects default device for the first element.
+        cl::Device::setDefault(devices[0]);
+    }
+    catch (const cl::Error& clError)
+    {
+        throw ClRuntimeUnavailableException(fmt::format(
+            "Could not initialize the CL runtime. Error description: {0}. CL error code: {1}",
+            clError.what(), clError.err()));
+    }
+
+    // Removes the use of global CL context.
+    cl::Context::setDefault(cl::Context{});
+    ARMNN_ASSERT(cl::Context::getDefault()() == NULL);
+
+    // Removes the use of global CL command queue.
+    cl::CommandQueue::setDefault(cl::CommandQueue{});
+    ARMNN_ASSERT(cl::CommandQueue::getDefault()() == NULL);
+
+    // Always load the OpenCL runtime.
+    LoadOpenClRuntime();
+}
+
+GpuFsaContextControl::~GpuFsaContextControl()
+{
+    // Load the OpencCL runtime without the tuned parameters to free the memory for them.
+    try
+    {
+        UnloadOpenClRuntime();
+    }
+    catch (const cl::Error& clError)
+    {
+        // This should not happen, it is ignored if it does.
+
+        // Coverity fix: BOOST_LOG_TRIVIAL (previously used here to report the error) may throw an
+        // exception of type std::length_error.
+        // Using stderr instead in this context as there is no point in nesting try-catch blocks here.
+        std::cerr << "A CL error occurred unloading the runtime tuner parameters: "
+                  << clError.what() << ". CL error code is: " << clError.err() << std::endl;
+    }
+}
+
+void GpuFsaContextControl::LoadOpenClRuntime()
+{
+    DoLoadOpenClRuntime(true);
+}
+
+void GpuFsaContextControl::UnloadOpenClRuntime()
+{
+    DoLoadOpenClRuntime(false);
+}
+
+void GpuFsaContextControl::DoLoadOpenClRuntime(bool updateTunedParameters)
+{
+    cl::Device device = cl::Device::getDefault();
+    cl::Context context;
+    cl::CommandQueue commandQueue;
+
+    if (arm_compute::CLScheduler::get().is_initialised() && arm_compute::CLScheduler::get().context()() != NULL)
+    {
+        // Wait for all queued CL requests to finish before reinitialising it.
+        arm_compute::CLScheduler::get().sync();
+    }
+
+    try
+    {
+        arm_compute::CLKernelLibrary::get().clear_programs_cache();
+        // Initialise the scheduler with a dummy context to release the LLVM data (which only happens when there are no
+        // context references); it is initialised again, with a proper context, later.
+        arm_compute::CLScheduler::get().init(context, commandQueue, device);
+        arm_compute::CLKernelLibrary::get().init(".", context, device);
+
+        {
+            //
+            // Here we replace the context with a new one in which
+            // the memory leak checks show it as an extra allocation but
+            // because of the scope of the leak checks, it doesn't count
+            // the disposal of the original object. On the other hand it
+            // does count the creation of this context which it flags
+            // as a memory leak. By adding the following line we prevent
+            // this to happen.
+            //
+            ARMNN_DISABLE_LEAK_CHECKING_IN_SCOPE();
+            context = cl::Context(device);
+        }
+
+        // NOTE: In this specific case profiling has to be enabled on the command queue
+        // in order for the CLTuner to work.
+        bool profilingNeededForClTuner = updateTunedParameters && m_Tuner &&
+            m_Tuner->tune_new_kernels();
+
+        if (m_ProfilingEnabled || profilingNeededForClTuner)
+        {
+            // Create a new queue with profiling enabled.
+            commandQueue = cl::CommandQueue(context, device, CL_QUEUE_PROFILING_ENABLE);
+        }
+        else
+        {
+            // Use default queue.
+            commandQueue = cl::CommandQueue(context, device);
+        }
+    }
+    catch (const cl::Error& clError)
+    {
+        throw ClRuntimeUnavailableException(fmt::format(
+            "Could not initialize the CL runtime. Error description: {0}. CL error code: {1}",
+            clError.what(), clError.err()));
+    }
+
+    // Note the first argument (path to cl source code) will be ignored as they should be embedded in the armcompute.
+    arm_compute::CLKernelLibrary::get().init(".", context, device);
+    arm_compute::CLScheduler::get().init(context, commandQueue, device, m_Tuner, m_HeuristicsHandle);
+}
+
+void GpuFsaContextControl::ClearClCache()
+{
+    DoLoadOpenClRuntime(true);
+}
+
+} // namespace armnn