From 8a570466aca7ae1619fe8fa715b68419fceb142f Mon Sep 17 00:00:00 2001
From: David Monahan <david.monahan@arm.com>
Date: Wed, 22 Nov 2023 13:24:25 +0000
Subject: IVGCVSW-8157 - Rebase existing GpuFsa patches to 23.11 Squashed
 commit of the following:

IVGCVSW-7159 Add GpuFsa backend skeleton
IVGCVSW-7380 Update the GpuFsa Skeleton to build and load ACL
IVGCVSW-7381 Add IsLayerSupported implementation to GpuFsa backend
IVGCVSW-7382 Implementation of Conv2d within GpuFsa

Signed-off-by: James Conroy <james.conroy@arm.com>
Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com>
Signed-off-by: David Monahan <david.monahan@arm.com>
Change-Id: Id23d9ee598535de7b38a99ca223cdf0ad2102cef
---
 src/backends/gpuFsa/GpuFsaBackendContext.cpp | 233 +++++++++++++++++++++++++++
 1 file changed, 233 insertions(+)
 create mode 100644 src/backends/gpuFsa/GpuFsaBackendContext.cpp

(limited to 'src/backends/gpuFsa/GpuFsaBackendContext.cpp')

diff --git a/src/backends/gpuFsa/GpuFsaBackendContext.cpp b/src/backends/gpuFsa/GpuFsaBackendContext.cpp
new file mode 100644
index 0000000000..84b948303a
--- /dev/null
+++ b/src/backends/gpuFsa/GpuFsaBackendContext.cpp
@@ -0,0 +1,233 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "GpuFsaBackendContext.hpp"
+#include "GpuFsaContextControl.hpp"
+
+#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/PolymorphicDowncast.hpp>
+
+#include <arm_compute/core/CL/OpenCL.h>
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <arm_compute/runtime/CL/CLScheduler.h>
+#include <arm_compute/runtime/CL/CLTunerTypes.h>
+
+namespace armnn
+{
+
+struct GpuFsaBackendContext::GpuFsaContextControlWrapper
+{
+    GpuFsaContextControlWrapper(arm_compute::CLTuner* tuner,
+                                arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle,
+                                bool profilingEnabled)
+        : m_GpuFsaContextControl(tuner, heuristicsHandle, profilingEnabled)
+    {}
+
+    bool Sync()
+    {
+        if (arm_compute::CLScheduler::get().context()() != NULL)
+        {
+            // Waits for all queued CL requests to finish before unloading the network they may be using.
+            try
+            {
+                // Coverity fix: arm_compute::CLScheduler::sync() may throw an exception of type cl::Error.
+                arm_compute::CLScheduler::get().sync();
+            }
+            catch (const cl::Error& err)
+            {
+                ARMNN_LOG(warning) << "Runtime::UnloadNetwork(): an error occurred while waiting for "
+                                      "the queued CL requests to finish";
+                throw err;
+            }
+        }
+
+        return true;
+    }
+
+    void ClearClCache()
+    {
+        if (arm_compute::CLScheduler::get().context()() != NULL)
+        {
+            // There are no loaded networks left, so clear the CL cache to free up memory
+            m_GpuFsaContextControl.ClearClCache();
+        }
+    }
+
+    GpuFsaContextControl m_GpuFsaContextControl;
+};
+
+GpuFsaBackendContext::GpuFsaBackendContext(const IRuntime::CreationOptions& options)
+    : IBackendContext(options)
+    , m_TuningFile()
+{
+    bool kernelProfiling = options.m_EnableGpuProfiling;
+
+    arm_compute::CLTuner* tuner = nullptr;
+    arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr;
+    bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr;
+    if (useLegacyTunerAPI)
+    {
+        auto clTunerParams = PolymorphicDowncast<ClTunedParameters*>(
+                                options.m_GpuAccTunedParameters.get());
+        tuner = &clTunerParams->m_Tuner;
+
+        if (tuner)
+        {
+            auto ConvertTuningLevel = [](IGpuAccTunedParameters::TuningLevel level,
+                                         armnn::IGpuAccTunedParameters::Mode mode)
+                {
+                    if (mode == armnn::IGpuAccTunedParameters::Mode::UseTunedParameters)
+                    {
+                        return TuningLevel::None;
+                    }
+
+                    switch(level)
+                    {
+                        case IGpuAccTunedParameters::TuningLevel::Rapid:
+                            return TuningLevel::Rapid;
+                        case IGpuAccTunedParameters::TuningLevel::Normal:
+                            return TuningLevel::Normal;
+                        case IGpuAccTunedParameters::TuningLevel::Exhaustive:
+                            return TuningLevel::Exhaustive;
+                        default:
+                        {
+                            ARMNN_LOG(warning) << "Tuning level not recognised.";
+                            return TuningLevel::None;
+                        }
+                    }
+                };
+
+            TuningLevel tuningLevel = ConvertTuningLevel(clTunerParams->m_TuningLevel, clTunerParams->m_Mode);
+            ConfigureTuner(*tuner, tuningLevel);
+        }
+    }
+    else //New backend options API
+    {
+        const TuningLevel defaultTuningLevel = TuningLevel::None;
+        auto tuningLevel = defaultTuningLevel;
+
+        ParseOptions(options.m_BackendOptions, "GpuFsa", [&](std::string name, const BackendOptions::Var& value)
+            {
+                if (name == "KernelProfilingEnabled")
+                {
+                    kernelProfiling |= ParseBooleanBackendOption(value, false);
+                } else if (name == "TuningFile")
+                {
+                    m_TuningFile = ParseStringBackendOption(value, "");
+                } else if (name == "TuningLevel")
+                {
+                    tuningLevel = ParseTuningLevel(value, defaultTuningLevel);
+                }
+                else if (name == "MLGOTuningFilePath")
+                {
+                    m_MLGOTuningFile = ParseStringBackendOption(value, "");
+                }
+            });
+
+        // Create the tuner, in tuning mode initially.
+        m_Tuner = std::make_unique<arm_compute::CLTuner>(true);
+
+        ConfigureTuner(*(m_Tuner.get()), tuningLevel);
+
+        if (!m_TuningFile.empty())
+        {
+            try
+            {
+                ARMNN_LOG(info) << "Loading Gpu tuning data from file: " << m_TuningFile;
+                m_Tuner->load_from_file(m_TuningFile.c_str());
+            }
+            catch (const std::exception& e)
+            {
+                // Warn if not tuning, otherwise tuning will generate new params
+                if (tuningLevel == TuningLevel::None)
+                {
+                    ARMNN_LOG(warning) << "Could not load GpuFsa tuner data file.";
+                }
+            }
+        }
+
+        if (!m_MLGOTuningFile.empty())
+        {
+            try
+            {
+                ARMNN_LOG(info) << "Loading Gpu MLGO tuning data from file: " << m_TuningFile;
+                if(m_MLGOTuner.reload_from_file(m_MLGOTuningFile.c_str()))
+                {
+                    mlgoTuner = &m_MLGOTuner;
+                }
+            }
+            catch (const std::exception& e)
+            {
+                ARMNN_LOG(warning) << "Could not load GpuFsa MLGO tuner data file.";
+            }
+        }
+
+        tuner = m_Tuner.get();
+    }
+
+    m_GpuFsaContextControlWrapper = std::make_unique<GpuFsaContextControlWrapper>(
+            tuner,
+            mlgoTuner,
+            kernelProfiling
+    );
+}
+
+bool GpuFsaBackendContext::BeforeLoadNetwork(NetworkId)
+{
+    return true;
+}
+
+bool GpuFsaBackendContext::AfterLoadNetwork(NetworkId networkId)
+{
+    {
+        std::lock_guard<std::mutex> lockGuard(m_Mutex);
+        m_NetworkIds.insert(networkId);
+    }
+    return true;
+}
+
+bool GpuFsaBackendContext::BeforeUnloadNetwork(NetworkId)
+{
+    return m_GpuFsaContextControlWrapper->Sync();
+}
+
+bool GpuFsaBackendContext::AfterUnloadNetwork(NetworkId networkId)
+{
+    bool clearCache = false;
+    {
+        std::lock_guard<std::mutex> lockGuard(m_Mutex);
+        m_NetworkIds.erase(networkId);
+        clearCache = m_NetworkIds.empty();
+    }
+
+    if (clearCache)
+    {
+        m_GpuFsaContextControlWrapper->ClearClCache();
+    }
+
+    return true;
+}
+
+bool GpuFsaBackendContext::AfterEnqueueWorkload(NetworkId)
+{
+    return m_GpuFsaContextControlWrapper->Sync();
+}
+
+GpuFsaBackendContext::~GpuFsaBackendContext()
+{
+    if (m_Tuner && !m_TuningFile.empty())
+    {
+        try
+        {
+            m_Tuner->save_to_file(m_TuningFile.c_str());
+        }
+        catch(const std::exception& e)
+        {
+            ARMNN_LOG(warning) << "Could not save GpuFsa tuner data to file " << m_TuningFile;
+        }
+    }
+}
+
+} // namespace armnn
\ No newline at end of file
-- 
cgit v1.2.1