Add GpuKernelArgumentBinding for runtime argument setting

* Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: SiCong Li <sicong.li@arm.com> 2023-06-28 09:49:45 +0100
committer: SiCong Li <sicong.li@arm.com> 2023-07-25 15:48:50 +0000
commit: 23882a9014eb3972bca958206866c8e0d0b829cc (patch)
tree: 9139b91699099160e26a64abd8cf182bd7447278 /src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
parent: 0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff)
download: ComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz
1 files changed, 27 insertions, 16 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index 50f34d9c14..c2bd012703 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -32,9 +32,8 @@ namespace experimental
 {
 namespace dynamic_fusion
 {
-
 GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
-    : _impl { std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
+    : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
 {
 }
 
@@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
 }
 
 GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
-    : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx),
-      _next_tensor_id(1), _mem_map()
+    : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info()
 {
 }
 
@@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
 
     tensor_info.set_id(tensor_id);
     _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+    // Save a *copy* of the user tensor info in workload context for future reference
+    // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context
+    _managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info));
 }
 
-void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor()
 {
-    ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
-
-    const auto tensor_id = next_tensor_id();
-
-    tensor_info.set_id(tensor_id);
-    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info };
+    auto       tensor_info = std::make_unique<TensorInfo>();
+    const auto tensor_id   = -next_tensor_id();
+    tensor_info->set_id(tensor_id);
+    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+    auto inserted       = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+    return inserted.first->second.get();
 }
 
-void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info)
 {
-    ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+    auto       tensor_info = std::make_unique<TensorInfo>(itensor_info);
+    const auto tensor_id   = next_tensor_id();
+    tensor_info->set_id(tensor_id);
+    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } };
+    auto inserted       = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+    return inserted.first->second.get();
+}
 
-    const auto tensor_id = -next_tensor_id();
+ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id)
+{
+    return _managed_tensor_info.at(id).get();
+}
 
-    tensor_info.set_id(tensor_id);
-    _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const
+{
+    return _managed_tensor_info.at(id).get();
 }
 
 ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id()
author	SiCong Li <sicong.li@arm.com>	2023-06-28 09:49:45 +0100
committer	SiCong Li <sicong.li@arm.com>	2023-07-25 15:48:50 +0000
commit	23882a9014eb3972bca958206866c8e0d0b829cc (patch)
tree	9139b91699099160e26a64abd8cf182bd7447278 /src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
parent	0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff)
download	ComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz