aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2023-06-28 09:49:45 +0100
committerSiCong Li <sicong.li@arm.com>2023-07-25 15:48:50 +0000
commit23882a9014eb3972bca958206866c8e0d0b829cc (patch)
tree9139b91699099160e26a64abd8cf182bd7447278 /src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
parent0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff)
downloadComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz
Add GpuKernelArgumentBinding for runtime argument setting
* Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li <sicong.li@arm.com> Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Anitha Raj <Anitha.Raj@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp43
1 files changed, 27 insertions, 16 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index 50f34d9c14..c2bd012703 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -32,9 +32,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-
GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
- : _impl { std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
+ : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
{
}
@@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
}
GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
- : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx),
- _next_tensor_id(1), _mem_map()
+ : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info()
{
}
@@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
tensor_info.set_id(tensor_id);
_mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+ // Save a *copy* of the user tensor info in workload context for future reference
+ // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context
+ _managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info));
}
-void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor()
{
- ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
-
- const auto tensor_id = next_tensor_id();
-
- tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info };
+ auto tensor_info = std::make_unique<TensorInfo>();
+ const auto tensor_id = -next_tensor_id();
+ tensor_info->set_id(tensor_id);
+ _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+ auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+ return inserted.first->second.get();
}
-void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info)
+ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info)
{
- ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id());
+ auto tensor_info = std::make_unique<TensorInfo>(itensor_info);
+ const auto tensor_id = next_tensor_id();
+ tensor_info->set_id(tensor_id);
+ _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } };
+ auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
+ return inserted.first->second.get();
+}
- const auto tensor_id = -next_tensor_id();
+ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id)
+{
+ return _managed_tensor_info.at(id).get();
+}
- tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const
+{
+ return _managed_tensor_info.at(id).get();
}
ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id()