diff options
author | SiCong Li <sicong.li@arm.com> | 2023-06-28 09:49:45 +0100 |
---|---|---|
committer | SiCong Li <sicong.li@arm.com> | 2023-07-25 15:48:50 +0000 |
commit | 23882a9014eb3972bca958206866c8e0d0b829cc (patch) | |
tree | 9139b91699099160e26a64abd8cf182bd7447278 /src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp | |
parent | 0a59e69fd922b02d9e3b5b043ee7f891061df7be (diff) | |
download | ComputeLibrary-23882a9014eb3972bca958206866c8e0d0b829cc.tar.gz |
Add GpuKernelArgumentBinding for runtime argument setting
* Add flexible runtime argument setting that accept argument bindings
exported from ckw.
* Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to
true, ckw will be tested in dynamic fusion validation tests. Otherwise
it will not be tested and the dynamic fusion will keep using
ClTemplateWriter instead.
* Fix CKW sampler for elementwise binary to deal with tile sizes > 1
in both dimensions
Resolves: COMPMID-6282
Partially resolves: COMPMID-6260
Signed-off-by: SiCong Li <sicong.li@arm.com>
Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Anitha Raj <Anitha.Raj@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp | 43 |
1 files changed, 27 insertions, 16 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp index 50f34d9c14..c2bd012703 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp @@ -32,9 +32,8 @@ namespace experimental { namespace dynamic_fusion { - GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx) - : _impl { std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) } + : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) } { } @@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const } GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx) - : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), - _next_tensor_id(1), _mem_map() + : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info() { } @@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info) tensor_info.set_id(tensor_id); _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User }; + // Save a *copy* of the user tensor info in workload context for future reference + // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context + _managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info)); } -void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info) +ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor() { - ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); - - const auto tensor_id = next_tensor_id(); - - tensor_info.set_id(tensor_id); - _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info }; + auto tensor_info = std::make_unique<TensorInfo>(); + const auto tensor_id = -next_tensor_id(); + tensor_info->set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; + auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info)); + return inserted.first->second.get(); } -void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info) +ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info) { - ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); + auto tensor_info = std::make_unique<TensorInfo>(itensor_info); + const auto tensor_id = next_tensor_id(); + tensor_info->set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } }; + auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info)); + return inserted.first->second.get(); +} - const auto tensor_id = -next_tensor_id(); +ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) +{ + return _managed_tensor_info.at(id).get(); +} - tensor_info.set_id(tensor_id); - _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; +const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const +{ + return _managed_tensor_info.at(id).get(); } ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id() |