From 3fcf3dcf7b6ffc613468ccaca580bde495677440 Mon Sep 17 00:00:00 2001
From: Viet-Hoa Do <viet-hoa.do@arm.com>
Date: Wed, 17 May 2023 15:17:48 +0100
Subject: Add multi-sketch support for dynamic fusion

* Tensors are owned by workload context instead of workload sketch
  so that they can be used by multiple sketches.
* Add an integration test for multi-sketch case.

Resolves: COMPMID-6148
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I37d0de5ac103fb2a85020aa1c26e49eb304f47b7
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9706
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 .../sketch/gpu/GpuWorkloadContext.cpp              | 88 ++++++++++++++++++-
 .../sketch/gpu/GpuWorkloadContextImpl.h            | 99 ++++++++++++++++++++++
 .../sketch/gpu/GpuWorkloadSketch.cpp               | 14 ---
 .../sketch/gpu/GpuWorkloadSketchImpl.h             | 23 ++---
 .../sketch/gpu/operators/GpuSoftmax.cpp            |  4 +-
 5 files changed, 191 insertions(+), 37 deletions(-)
 create mode 100644 src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h

(limited to 'src/dynamic_fusion/sketch')
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index 623bf351f8..50f34d9c14 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
 #include "arm_compute/core/CL/CLCompileContext.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
 
 namespace arm_compute
 {
@@ -30,26 +32,104 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
+
 GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
-    : _gpu_language{ GpuLanguage::OpenCL }, _cl_compile_ctx{ cl_compile_ctx }
+    : _impl { std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
 {
 }
 
+GpuWorkloadContext::~GpuWorkloadContext() = default;
+
+GpuWorkloadContext::GpuWorkloadContext(GpuWorkloadContext &&other) = default;
+
+GpuWorkloadContext &GpuWorkloadContext::operator=(GpuWorkloadContext &&other) = default;
+
 GpuTarget GpuWorkloadContext::gpu_target() const
 {
-    return _cl_compile_ctx->get_gpu_target();
+    return _impl->cl_compile_context()->get_gpu_target();
 }
 
 GpuLanguage GpuWorkloadContext::gpu_language() const
 {
-    return _gpu_language;
+    return _impl->gpu_language();
 }
 
 const CLCompileContext *GpuWorkloadContext::cl_compile_context() const
+{
+    return _impl->cl_compile_context();
+}
+
+void GpuWorkloadContext::register_user_tensor(ITensorInfo &tensor_info)
+{
+    _impl->register_user_tensor(tensor_info);
+}
+
+GpuWorkloadContext::Impl &GpuWorkloadContext::implementation()
+{
+    return *_impl;
+}
+
+const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
+{
+    return *_impl;
+}
+
+GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
+    : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx),
+      _next_tensor_id(1), _mem_map()
+{
+}
+
+GpuLanguage GpuWorkloadContext::Impl::gpu_language() const
+{
+    return _gpu_language;
+}
+
+const CLCompileContext *GpuWorkloadContext::Impl::cl_compile_context() const
 {
     return _cl_compile_ctx;
 }
 
+const MemoryDescriptorMap &GpuWorkloadContext::Impl::mem_map() const
+{
+    return _mem_map;
+}
+
+void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
+{
+    ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+
+    const auto tensor_id = next_tensor_id();
+
+    tensor_info.set_id(tensor_id);
+    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+}
+
+void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info)
+{
+    ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+
+    const auto tensor_id = next_tensor_id();
+
+    tensor_info.set_id(tensor_id);
+    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info };
+}
+
+void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info)
+{
+    ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+
+    const auto tensor_id = -next_tensor_id();
+
+    tensor_info.set_id(tensor_id);
+    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+}
+
+ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id()
+{
+    return _next_tensor_id++;
+}
+
 } // namespace dynamic_fusion
 } // namespace experimental
 } // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
new file mode 100644
index 0000000000..a857932791
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXTIMPL_H
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXTIMPL_H
+
+#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+/** Internal implementation of workload context. */
+class GpuWorkloadContext::Impl
+{
+public:
+    /** Constructor
+     *
+     * @param[in] gpu_language   Target GPU language.
+     * @param[in] cl_compile_ctx CL compile context.
+     */
+    Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx);
+
+    /** Copy constructor */
+    Impl(Impl &) = default;
+
+    /** Assignment */
+    Impl& operator=(Impl &) = default;
+
+    /** Get target GPU language. */
+    GpuLanguage gpu_language() const;
+
+    /** Get CL compile context. */
+    const CLCompileContext *cl_compile_context() const;
+
+    /** Get memory descriptor registry. */
+    const MemoryDescriptorMap &mem_map() const;
+
+    /** Set a new ID and register the user tensor info.
+     *
+     * @param[in, out] tensor_info The tensor info to be registered.
+     */
+    void register_user_tensor(ITensorInfo &tensor_info);
+
+    /** Set a new ID and register the auxiliary tensor info.
+     *
+     * @param[in, out] tensor_info The tensor info to be registered.
+     * @param[in]      mem_info    The auxiliary tensor memory info.
+     */
+    void register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info);
+
+    /** Set a new ID and register the virtual tensor info.
+     *
+     * @param[in, out] tensor_info The tensor info to be registered.
+     */
+    void register_virtual_tensor(ITensorInfo &tensor_info);
+
+private:
+    ITensorInfo::Id next_tensor_id();
+
+    GpuLanguage _gpu_language;
+    CLCompileContext *_cl_compile_ctx;
+
+    ITensorInfo::Id _next_tensor_id;
+    MemoryDescriptorMap _mem_map;
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXTIMPL_H
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
index 33f672071d..d3a20c0dfe 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -43,20 +43,6 @@ const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const
     return _impl->context();
 }
 
-void GpuWorkloadSketch::register_new_tensor(ITensorInfo &tensor_info)
-{
-    tensor_info.set_id(_impl->allocate_new_tensor_id());
-    // All input output tensors are User tensors that need real backing memory
-    _impl->register_memory_descriptor(tensor_info, MemoryDescriptor{ MemoryType::User });
-}
-
-TensorInfo GpuWorkloadSketch::create_tensor_info()
-{
-    TensorInfo tensor_info{};
-    register_new_tensor(tensor_info);
-    return tensor_info;
-}
-
 GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation()
 {
     return *_impl;
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index d5075d5c94..44c99e844b 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
 #include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
@@ -53,8 +54,7 @@ public:
           _comp_services{},
           _component_graph{ &_comp_services },
           _operator_group{},
-          _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() },
-          _mem_map{}
+          _managed_tensor_info_list{ std::vector<std::unique_ptr<TensorInfo>>() }
     {
     }
     /** Prevent instances of this class from being copy constructed */
@@ -101,7 +101,8 @@ public:
      */
     GpuWorkloadSourceCode generate_source_code() const
     {
-        return component_graph().fuse(_mem_map).write_workload_code();
+        const auto mem_map = _context->implementation().mem_map();
+        return component_graph().fuse(mem_map).write_workload_code();
     }
     /** Create a virtual (see @ref MemoryType) tensor info and save it
      *
@@ -110,8 +111,7 @@ public:
     ITensorInfo *create_virtual_tensor()
     {
         auto uptr = std::make_unique<TensorInfo>();
-        uptr->set_id(-allocate_new_tensor_id()); // virtual tensors must have negative id
-        register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Virtual });
+        _context->implementation().register_virtual_tensor(*uptr);
         _managed_tensor_info_list.emplace_back(std::move(uptr));
         return _managed_tensor_info_list.back().get();
     }
@@ -129,20 +129,10 @@ public:
     ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info)
     {
         auto uptr = std::make_unique<TensorInfo>(tensor_info);
-        uptr->set_id(allocate_new_tensor_id());
-        register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ uptr->total_size() } });
+        _context->implementation().register_aux_tensor(*uptr, AuxMemoryInfo{ uptr->total_size() });
         _managed_tensor_info_list.emplace_back(std::move(uptr));
         return _managed_tensor_info_list.back().get();
     }
-    /** Register memory descriptor of a tensor info
-     *
-     * @param[in] info     @ref ITensorInfo to be registered
-     * @param[in] mem_desc @ref MemoryDescriptor to be registered with @p info
-     */
-    void register_memory_descriptor(const ITensorInfo &info, const MemoryDescriptor &mem_desc)
-    {
-        _mem_map[info.id()] = mem_desc;
-    }
 
 private:
     Context                                 *_context;
@@ -151,7 +141,6 @@ private:
     GpuOperatorGroup                         _operator_group;
     ITensorInfo::Id                          _next_id{ ITensorInfo::invalid_tensor_id };
     std::vector<std::unique_ptr<TensorInfo>> _managed_tensor_info_list;
-    MemoryDescriptorMap                      _mem_map;
 };
 } // namespace dynamic_fusion
 } // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
index 291a1e5bda..ffc4553a7d 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
@@ -136,9 +136,9 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
     ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
     ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
     TensorShape  logits_sum_shape = src->tensor_shape();
-    ITensorInfo *logits           = sketch.implementation().create_auxiliary_tensor(src->clone()->set_tensor_shape(logits_sum_shape));
+    ITensorInfo *logits           = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
     logits_sum_shape.set(0, 1);
-    ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_tensor_shape(logits_sum_shape));
+    ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
 
     // Auto initialize dst tensor info and the auxiliary tensor infos as well
     auto_init_if_empty(*dst, *src->clone());
-- 
cgit v1.2.1