From 3fcf3dcf7b6ffc613468ccaca580bde495677440 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 17 May 2023 15:17:48 +0100 Subject: Add multi-sketch support for dynamic fusion * Tensors are owned by workload context instead of workload sketch so that they can be used by multiple sketches. * Add an integration test for multi-sketch case. Resolves: COMPMID-6148 Signed-off-by: Viet-Hoa Do Change-Id: I37d0de5ac103fb2a85020aa1c26e49eb304f47b7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9706 Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- .../sketch/gpu/GpuWorkloadContext.cpp | 88 ++++++++++++++++++- .../sketch/gpu/GpuWorkloadContextImpl.h | 99 ++++++++++++++++++++++ .../sketch/gpu/GpuWorkloadSketch.cpp | 14 --- .../sketch/gpu/GpuWorkloadSketchImpl.h | 23 ++--- .../sketch/gpu/operators/GpuSoftmax.cpp | 4 +- 5 files changed, 191 insertions(+), 37 deletions(-) create mode 100644 src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h (limited to 'src/dynamic_fusion/sketch') diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp index 623bf351f8..50f34d9c14 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ + #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h" #include "arm_compute/core/CL/CLCompileContext.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" namespace arm_compute { @@ -30,26 +32,104 @@ namespace experimental { namespace dynamic_fusion { + GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx) - : _gpu_language{ GpuLanguage::OpenCL }, _cl_compile_ctx{ cl_compile_ctx } + : _impl { std::make_unique(GpuLanguage::OpenCL, cl_compile_ctx) } { } +GpuWorkloadContext::~GpuWorkloadContext() = default; + +GpuWorkloadContext::GpuWorkloadContext(GpuWorkloadContext &&other) = default; + +GpuWorkloadContext &GpuWorkloadContext::operator=(GpuWorkloadContext &&other) = default; + GpuTarget GpuWorkloadContext::gpu_target() const { - return _cl_compile_ctx->get_gpu_target(); + return _impl->cl_compile_context()->get_gpu_target(); } GpuLanguage GpuWorkloadContext::gpu_language() const { - return _gpu_language; + return _impl->gpu_language(); } const CLCompileContext *GpuWorkloadContext::cl_compile_context() const +{ + return _impl->cl_compile_context(); +} + +void GpuWorkloadContext::register_user_tensor(ITensorInfo &tensor_info) +{ + _impl->register_user_tensor(tensor_info); +} + +GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() +{ + return *_impl; +} + +const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const +{ + return *_impl; +} + +GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx) + : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), + _next_tensor_id(1), _mem_map() +{ +} + +GpuLanguage GpuWorkloadContext::Impl::gpu_language() const +{ + return _gpu_language; +} + +const CLCompileContext *GpuWorkloadContext::Impl::cl_compile_context() const { return _cl_compile_ctx; } +const MemoryDescriptorMap &GpuWorkloadContext::Impl::mem_map() const +{ + return _mem_map; +} + +void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info) +{ + ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); + + const auto tensor_id = next_tensor_id(); + + tensor_info.set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User }; +} + +void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info) +{ + ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); + + const auto tensor_id = next_tensor_id(); + + tensor_info.set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info }; +} + +void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info) +{ + ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); + + const auto tensor_id = -next_tensor_id(); + + tensor_info.set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; +} + +ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id() +{ + return _next_tensor_id++; +} + } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h new file mode 100644 index 0000000000..a857932791 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXTIMPL_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXTIMPL_H + +#include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +/** Internal implementation of workload context. */ +class GpuWorkloadContext::Impl +{ +public: + /** Constructor + * + * @param[in] gpu_language Target GPU language. + * @param[in] cl_compile_ctx CL compile context. + */ + Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx); + + /** Copy constructor */ + Impl(Impl &) = default; + + /** Assignment */ + Impl& operator=(Impl &) = default; + + /** Get target GPU language. */ + GpuLanguage gpu_language() const; + + /** Get CL compile context. */ + const CLCompileContext *cl_compile_context() const; + + /** Get memory descriptor registry. */ + const MemoryDescriptorMap &mem_map() const; + + /** Set a new ID and register the user tensor info. + * + * @param[in, out] tensor_info The tensor info to be registered. + */ + void register_user_tensor(ITensorInfo &tensor_info); + + /** Set a new ID and register the auxiliary tensor info. + * + * @param[in, out] tensor_info The tensor info to be registered. + * @param[in] mem_info The auxiliary tensor memory info. + */ + void register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info); + + /** Set a new ID and register the virtual tensor info. + * + * @param[in, out] tensor_info The tensor info to be registered. + */ + void register_virtual_tensor(ITensorInfo &tensor_info); + +private: + ITensorInfo::Id next_tensor_id(); + + GpuLanguage _gpu_language; + CLCompileContext *_cl_compile_ctx; + + ITensorInfo::Id _next_tensor_id; + MemoryDescriptorMap _mem_map; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADCONTEXTIMPL_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp index 33f672071d..d3a20c0dfe 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp @@ -43,20 +43,6 @@ const GpuWorkloadSketch::Context *GpuWorkloadSketch::gpu_context() const return _impl->context(); } -void GpuWorkloadSketch::register_new_tensor(ITensorInfo &tensor_info) -{ - tensor_info.set_id(_impl->allocate_new_tensor_id()); - // All input output tensors are User tensors that need real backing memory - _impl->register_memory_descriptor(tensor_info, MemoryDescriptor{ MemoryType::User }); -} - -TensorInfo GpuWorkloadSketch::create_tensor_info() -{ - TensorInfo tensor_info{}; - register_new_tensor(tensor_info); - return tensor_info; -} - GpuWorkloadSketch::Implementation &GpuWorkloadSketch::implementation() { return *_impl; diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h index d5075d5c94..44c99e844b 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h @@ -26,6 +26,7 @@ #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h" #include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h" @@ -53,8 +54,7 @@ public: _comp_services{}, _component_graph{ &_comp_services }, _operator_group{}, - _managed_tensor_info_list{ std::vector>() }, - _mem_map{} + _managed_tensor_info_list{ std::vector>() } { } /** Prevent instances of this class from being copy constructed */ @@ -101,7 +101,8 @@ public: */ GpuWorkloadSourceCode generate_source_code() const { - return component_graph().fuse(_mem_map).write_workload_code(); + const auto mem_map = _context->implementation().mem_map(); + return component_graph().fuse(mem_map).write_workload_code(); } /** Create a virtual (see @ref MemoryType) tensor info and save it * @@ -110,8 +111,7 @@ public: ITensorInfo *create_virtual_tensor() { auto uptr = std::make_unique(); - uptr->set_id(-allocate_new_tensor_id()); // virtual tensors must have negative id - register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Virtual }); + _context->implementation().register_virtual_tensor(*uptr); _managed_tensor_info_list.emplace_back(std::move(uptr)); return _managed_tensor_info_list.back().get(); } @@ -129,20 +129,10 @@ public: ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info) { auto uptr = std::make_unique(tensor_info); - uptr->set_id(allocate_new_tensor_id()); - register_memory_descriptor(*uptr, MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ uptr->total_size() } }); + _context->implementation().register_aux_tensor(*uptr, AuxMemoryInfo{ uptr->total_size() }); _managed_tensor_info_list.emplace_back(std::move(uptr)); return _managed_tensor_info_list.back().get(); } - /** Register memory descriptor of a tensor info - * - * @param[in] info @ref ITensorInfo to be registered - * @param[in] mem_desc @ref MemoryDescriptor to be registered with @p info - */ - void register_memory_descriptor(const ITensorInfo &info, const MemoryDescriptor &mem_desc) - { - _mem_map[info.id()] = mem_desc; - } private: Context *_context; @@ -151,7 +141,6 @@ private: GpuOperatorGroup _operator_group; ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id }; std::vector> _managed_tensor_info_list; - MemoryDescriptorMap _mem_map; }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp index 291a1e5bda..ffc4553a7d 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp @@ -136,9 +136,9 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst); ARM_COMPUTE_LOG_PARAMS(src, dst, attributes); TensorShape logits_sum_shape = src->tensor_shape(); - ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(src->clone()->set_tensor_shape(logits_sum_shape)); + ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape)); logits_sum_shape.set(0, 1); - ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_tensor_shape(logits_sum_shape)); + ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape)); // Auto initialize dst tensor info and the auxiliary tensor infos as well auto_init_if_empty(*dst, *src->clone()); -- cgit v1.2.1