From 23882a9014eb3972bca958206866c8e0d0b829cc Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 28 Jun 2023 09:49:45 +0100 Subject: Add GpuKernelArgumentBinding for runtime argument setting * Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Reviewed-by: Anitha Raj Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h | 128 ++++++++++++++++++++- .../sketch/gpu/GpuKernelComponentGraph.cpp | 6 +- .../sketch/gpu/GpuKernelComponentGraph.h | 10 +- .../sketch/gpu/GpuKernelComponentStream.cpp | 8 +- .../sketch/gpu/GpuKernelComponentStream.h | 6 +- .../sketch/gpu/GpuKernelSourceCode.h | 16 ++- src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp | 16 ++- .../sketch/gpu/GpuWorkloadContext.cpp | 43 ++++--- .../sketch/gpu/GpuWorkloadContextImpl.h | 30 +++-- .../sketch/gpu/GpuWorkloadSketchImpl.h | 44 +++---- .../sketch/gpu/GpuWorkloadSourceCode.h | 105 ++++++++++++++++- src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h | 20 +++- .../sketch/gpu/ckw_driver/GpuCkwDriver.cpp | 45 +++++--- .../sketch/gpu/ckw_driver/GpuCkwDriver.h | 7 +- .../sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp | 8 +- .../sketch/gpu/ckw_driver/GpuCkwVariableTable.h | 7 +- .../gpu/ckw_driver/components/GpuCkwActivation.cpp | 31 ++--- .../gpu/ckw_driver/components/GpuCkwCast.cpp | 21 ++-- .../components/GpuCkwElementwiseBinary.cpp | 33 +++--- .../gpu/ckw_driver/components/GpuCkwStore.cpp | 5 +- .../ckw_driver/components/utils/TypeConverter.h | 98 ++++++++++++++++ .../sketch/gpu/components/IGpuKernelComponent.h | 8 +- .../gpu/components/cl/ClComponentActivation.cpp | 26 +++-- .../gpu/components/cl/ClComponentActivation.h | 16 ++- .../sketch/gpu/components/cl/ClComponentCast.cpp | 26 +++-- .../sketch/gpu/components/cl/ClComponentCast.h | 14 ++- .../components/cl/ClComponentElementwiseBinary.cpp | 27 +++-- .../components/cl/ClComponentElementwiseBinary.h | 21 +++- .../sketch/gpu/components/cl/ClComponentStore.cpp | 26 ++++- .../sketch/gpu/components/cl/ClComponentStore.h | 16 ++- 30 files changed, 668 insertions(+), 199 deletions(-) (limited to 'src/dynamic_fusion/sketch') diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h index 302d4c8562..226e1a2df3 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h @@ -33,6 +33,7 @@ namespace experimental namespace dynamic_fusion { /** Contain information required to set up a kernel argument at run time + * @deprecated To be removed along with ClTemplateWriter */ struct GpuKernelArgumentInfo { @@ -66,10 +67,9 @@ struct GpuKernelArgumentInfo } Type type{ Type::Tensor_4D_t_Buffer }; }; - bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1); - /** Kernel argument information linked with its corresponding @ref ITensorInfo + * @deprecated To be removed along with ClTemplateWriter */ class GpuKernelArgument { @@ -124,6 +124,130 @@ private: TensorInfo _tensor_info{}; GpuKernelArgumentInfo _kernel_arg_info{}; }; +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF +/** Describe how the tensor runtime memory can be accessed + * + * Please see documentation under @ref GpuKernelArgumentBinding + */ +enum class TensorStorageType +{ + Unknown, + ClBufferUint8Ptr, + ClImage2dReadOnly, + ClImage2dWriteOnly, +}; + +/** Describe additional runtime information about the tensor + * + * Please see documentation under @ref GpuKernelArgumentBinding + */ +enum class TensorComponentType +{ + Unknown, + OffsetFirstElement, + Stride0, + Stride1, + Stride2, + Stride3, + Stride4, + Dim0, + Dim1, + Dim2, + Dim3, + Dim4, + Dim1xDim2, + Dim2xDim3, + Dim1xDim2xDim3, +}; + +/** Describe how to extract information from a runtime Gpu tensor, and set it as an argument to a gpu kernel at runtime + * + * A kernel argument is just an argument to the gpu kernel as shown in the argument list below. This contrasts with a "workload argument" which is a tensor (@ref GpuWorkloadArgument) + * void kernel(arg0, arg1, ... argN) + * + * In a kernel generated using dynamic fusion (@ref GpuKernelSourceCode), every kernel argument describes part of a tensor. + * A tensor is described as: **storages** followed by **components** + * + * A storage (@ref TensorStorageType) describes how the tensor runtime memory can be accessed (e.g. via a global uint8 pointer to a CL buffer) + * A component (@ref TensorComponentType) describes additional runtime information about the tensor (e.g. the dimensions of the tensor) + * + * The arguments are arranged in the order of use in the generated kernel code: + * + * arg0 , arg1 , arg2 , ..., , argN + * storage, component0, component1, ..., componentX, storage, component0, component1, ..., componentY + * | tensor0 | tensor1 | + * + * An example argument list: + * + * void kernel( + * image2d_t t0_image, // TensorStorageType::ClImage2dReadOnly + * uint8_t* t0_ptr, // TensorStorageType::ClBufferUint8Ptr + * uint t0_dim0, // TensorComponentType::Dim0 + * uint t0_stride1, // TensorComponentType::Stride1 + * image2d_t t1_ptr, // TensorStorageType::ClImage2dReadOnly + * uint t1_dim1xdim2, // TensorComponentType::Dim1xDim2 + * uint t1_stride1, // TensorComponentType::Stride1 + * uint t1_stride2, // TensorComponentType:Stride2 + * ) + * + */ +class GpuKernelArgumentBinding +{ +public: + enum class Type : int32_t + { + TensorStorage, /** @ref TensorStorageType */ + TensorComponent /** @ref TensorComponentType */ + }; + GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage) + : _type{ Type::TensorStorage }, _id{ id }, _value{} + { + _value.tensor_storage_type = storage; + } + GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component) + : _type{ Type::TensorComponent }, _id{ id }, _value{} + { + _value.tensor_component_type = component; + } + /** Storage type of the tensor + */ + TensorStorageType tensor_storage_type() const + { + ARM_COMPUTE_ERROR_ON(_type != Type::TensorStorage); + return _value.tensor_storage_type; + } + /** Component of the tensor + */ + TensorComponentType tensor_component_type() const + { + ARM_COMPUTE_ERROR_ON(_type != Type::TensorComponent); + return _value.tensor_component_type; + } + /** Id of the tensor this kernel argument belongs to + */ + ITensorInfo::Id id() const + { + return _id; + } + /** Type of the kernel argument + */ + Type type() const + { + return _type; + } + +private: + Type _type; + ITensorInfo::Id _id; + union Value + { + TensorStorageType tensor_storage_type; + TensorComponentType tensor_component_type; + }; + Value _value; +}; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF + } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp index b70a192775..5a65ede38b 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp @@ -44,14 +44,14 @@ std::vector GpuKernelComponentGraph::get_tensor_ids(c return tensor_ids; } -GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services) - : _services{ services }, _components{}, _tensors{}, _dependency_graph{} +GpuKernelComponentGraph::GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services) + : _context{ context }, _services{ services }, _components{}, _tensors{}, _dependency_graph{} { } GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const { - GpuKernelComponentStream stream{ _services, mem_map }; + GpuKernelComponentStream stream{ _context, _services, mem_map }; const auto op_seq = _dependency_graph.build_operators_sequence(); stream.new_component_group(); diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h index 8314ea0a50..85c9b45840 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" @@ -49,9 +49,10 @@ class GpuKernelComponentGraph public: /** Constructor * + * @param[in] context @ref GpuWorkloadContext to be used by the graph * @param[in] services @ref GpuComponentServices to be used by the graph */ - GpuKernelComponentGraph(GpuComponentServices *services); + GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services); /** Prevent instances of this class from being copy constructed */ GpuKernelComponentGraph(const GpuKernelComponentGraph &graph) = delete; /** Prevent instances of this class from being copied */ @@ -98,6 +99,7 @@ public: private: static std::vector get_tensor_ids(const std::vector tensors); + GpuWorkloadContext *_context; GpuComponentServices *_services; std::map> _components; std::map _tensors; @@ -106,4 +108,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */ +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */ diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp index 8f4eadc477..a2b6623370 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,8 +33,8 @@ namespace experimental { namespace dynamic_fusion { -GpuKernelComponentStream::GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map) - : _services{ services }, _component_groups{}, _mem_map{ mem_map } +GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map) + : _context{ context }, _services{ services }, _component_groups{}, _mem_map{ mem_map } { } @@ -51,7 +51,7 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code() const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code(); // The whole unit workload stage is determined by the root component const auto unit_workload_stage = group.get_root_component()->properties().stage(); - source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map); + source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map, _context); } return source_code; } diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h index cbaa7c297b..ba2503a938 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,10 +49,11 @@ class GpuKernelComponentStream public: /** Constructor * + * @param[in] context @ref GpuWorkloadContext to be used throughout the stream * @param[in] services @ref GpuComponentServices to be used throughout the stream * @param[in] mem_map @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode */ - GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map); + GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map); /** Allow instances of this class to be copy constructed */ GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default; /** Allow instances of this class to be copied */ @@ -78,6 +79,7 @@ public: bool add_component(IGpuKernelComponent *component); private: + GpuWorkloadContext *_context; GpuComponentServices *_services; std::vector _component_groups{}; MemoryDescriptorMap _mem_map{}; diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h index 7479328d7b..64e1cdc3bc 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,14 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include +#else // ACL_INTERNAL_TEST_CKW_IN_DF +#include +#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include namespace arm_compute @@ -38,7 +42,11 @@ namespace experimental namespace dynamic_fusion { /** The argument list of a @ref GpuKernelSourceCode */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF using GpuKernelArgumentList = std::map; +#else // ACL_INTERNAL_TEST_CKW_IN_DF +using GpuKernelArgumentList = std::deque; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload */ @@ -123,4 +131,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */ +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */ diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp index 00f625de28..c99984fc0e 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,11 @@ #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h" +#else // ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h" +#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -46,11 +50,19 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern GpuKernelSourceCode GpuLogicalKernel::write_kernel_code() { GpuKernelSourceCode code; - ClTemplateWriter writer{ _comp_group }; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + ClTemplateWriter writer { _comp_group }; +#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuCkwDriver writer { _comp_group }; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF code.name(writer.get_name()); code.code(writer.get_code()); +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF code.arguments(writer.get_tensors()); +#else // ACL_INTERNAL_TEST_CKW_IN_DF + code.arguments(writer.get_kernel_arguments()); +#endif // ACL_INTERNAL_TEST_CKW_IN_DF code.build_options(writer.get_build_options()); code.config_id(writer.get_config_id()); code.window(writer.get_window()); diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp index 50f34d9c14..c2bd012703 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp @@ -32,9 +32,8 @@ namespace experimental { namespace dynamic_fusion { - GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx) - : _impl { std::make_unique(GpuLanguage::OpenCL, cl_compile_ctx) } + : _impl{ std::make_unique(GpuLanguage::OpenCL, cl_compile_ctx) } { } @@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const } GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx) - : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), - _next_tensor_id(1), _mem_map() + : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info() { } @@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info) tensor_info.set_id(tensor_id); _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User }; + // Save a *copy* of the user tensor info in workload context for future reference + // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context + _managed_tensor_info.emplace(tensor_info.id(), std::make_unique(tensor_info)); } -void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info) +ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor() { - ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); - - const auto tensor_id = next_tensor_id(); - - tensor_info.set_id(tensor_id); - _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info }; + auto tensor_info = std::make_unique(); + const auto tensor_id = -next_tensor_id(); + tensor_info->set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; + auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info)); + return inserted.first->second.get(); } -void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info) +ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info) { - ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); + auto tensor_info = std::make_unique(itensor_info); + const auto tensor_id = next_tensor_id(); + tensor_info->set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } }; + auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info)); + return inserted.first->second.get(); +} - const auto tensor_id = -next_tensor_id(); +ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) +{ + return _managed_tensor_info.at(id).get(); +} - tensor_info.set_id(tensor_id); - _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; +const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const +{ + return _managed_tensor_info.at(id).get(); } ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id() diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h index a857932791..c169476a70 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h @@ -36,7 +36,6 @@ namespace experimental { namespace dynamic_fusion { - /** Internal implementation of workload context. */ class GpuWorkloadContext::Impl { @@ -52,7 +51,7 @@ public: Impl(Impl &) = default; /** Assignment */ - Impl& operator=(Impl &) = default; + Impl &operator=(Impl &) = default; /** Get target GPU language. */ GpuLanguage gpu_language() const; @@ -69,27 +68,34 @@ public: */ void register_user_tensor(ITensorInfo &tensor_info); - /** Set a new ID and register the auxiliary tensor info. + /** Create a virtual (see @ref MemoryType) tensor info and save it * - * @param[in, out] tensor_info The tensor info to be registered. - * @param[in] mem_info The auxiliary tensor memory info. + * @return ITensorInfo* The created virtual tensor info object pointer */ - void register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info); - - /** Set a new ID and register the virtual tensor info. + ITensorInfo *create_virtual_tensor(); + /** Create an auxiliary (see @ref MemoryType) tensor info and save it * - * @param[in, out] tensor_info The tensor info to be registered. + * @param[in] tensor_info @ref ITensorInfo to copy from + * + * @return ITensorInfo* The created auxiliary tensor info object pointer */ - void register_virtual_tensor(ITensorInfo &tensor_info); + ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info); + + /** Get tensor info created by this context, from id */ + ITensorInfo *get_tensor_info(ITensorInfo::Id id); + + /** Get tensor info created by this context, from id */ + const ITensorInfo *get_tensor_info(ITensorInfo::Id id) const; private: ITensorInfo::Id next_tensor_id(); - GpuLanguage _gpu_language; + GpuLanguage _gpu_language; CLCompileContext *_cl_compile_ctx; - ITensorInfo::Id _next_tensor_id; + ITensorInfo::Id _next_tensor_id; MemoryDescriptorMap _mem_map; + std::map> _managed_tensor_info; }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h index 44c99e844b..d3033898e9 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h @@ -26,13 +26,10 @@ #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h" #include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h" - -#include -#include +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" namespace arm_compute { @@ -52,9 +49,8 @@ public: Context *context) : _context{ context }, _comp_services{}, - _component_graph{ &_comp_services }, - _operator_group{}, - _managed_tensor_info_list{ std::vector>() } + _component_graph{ _context, &_comp_services }, + _operator_group{} { } /** Prevent instances of this class from being copy constructed */ @@ -90,10 +86,6 @@ public: { return _operator_group; } - ITensorInfo::Id allocate_new_tensor_id() - { - return ++_next_id; - } /** Generate @ref GpuWorkloadSourceCode from the workload sketch * @note The sketch must be valid. Any error encountered during the building of the code will be thrown. * @@ -110,16 +102,8 @@ public: */ ITensorInfo *create_virtual_tensor() { - auto uptr = std::make_unique(); - _context->implementation().register_virtual_tensor(*uptr); - _managed_tensor_info_list.emplace_back(std::move(uptr)); - return _managed_tensor_info_list.back().get(); + return _context->implementation().create_virtual_tensor(); } - /** Create an auxiliary (see @ref MemoryType) tensor info and save it - * - * @return ITensorInfo* The created auxiliary tensor info object pointer - */ - /** Create an auxiliary (see @ref MemoryType) tensor info and save it * * @param[in] tensor_info @ref ITensorInfo to copy from @@ -128,19 +112,19 @@ public: */ ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info) { - auto uptr = std::make_unique(tensor_info); - _context->implementation().register_aux_tensor(*uptr, AuxMemoryInfo{ uptr->total_size() }); - _managed_tensor_info_list.emplace_back(std::move(uptr)); - return _managed_tensor_info_list.back().get(); + return _context->implementation().create_auxiliary_tensor(tensor_info); + } + + ITensorInfo *get_tensor_info(ITensorInfo::Id id) + { + return _context->implementation().get_tensor_info(id); } private: - Context *_context; - GpuComponentServices _comp_services; - GpuKernelComponentGraph _component_graph; - GpuOperatorGroup _operator_group; - ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id }; - std::vector> _managed_tensor_info_list; + Context *_context; + GpuComponentServices _comp_services; + GpuKernelComponentGraph _component_graph; + GpuOperatorGroup _operator_group; }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h index d1d0bdf77f..578366daaf 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h @@ -27,6 +27,7 @@ #include "arm_compute/core/experimental/Types.h" #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" namespace arm_compute { @@ -34,10 +35,45 @@ namespace experimental { namespace dynamic_fusion { +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF +namespace +{ +/** Extract kernel arguments of one tensor from a flat list of kernel arguments. + * + * @param[in] flat_kernel_args + * @return GpuKernelArgumentList + */ +GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args) +{ + if(flat_kernel_args.empty()) + { + return {}; + } + GpuKernelArgumentList tensor_kargs{}; + + const GpuKernelArgumentBinding &karg_head = flat_kernel_args.front(); + tensor_kargs.push_back(karg_head); + flat_kernel_args.pop_front(); + const auto tensor_id = karg_head.id(); + + while(!flat_kernel_args.empty()) + { + const GpuKernelArgumentBinding &karg = flat_kernel_args.front(); + if(karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments + { + return tensor_kargs; + } + tensor_kargs.push_back(karg); + flat_kernel_args.pop_front(); + } + return tensor_kargs; +} +} +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */ using UnitWorkloadId = int32_t; -/** Describes all the info related to a kernel in order to: +/** Describes all the info related to a **workload argument** (tensor) in order to: * - be used by runtime to configure gpu kernel argument * - be used by memory managers to allocate required memory */ @@ -46,6 +82,7 @@ class GpuWorkloadArgument public: /** Default constructor */ GpuWorkloadArgument() = default; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Constructor * * @param[in] tensor_info @ref ITensorInfo of the workload argument @@ -60,6 +97,22 @@ public: _kernel_arg_info{ kernel_arg_info } { } +#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Constructor + * + * @param[in] tensor_info @ref ITensorInfo of the workload argument + * @param[in] mem_desc @ref MemoryDescriptor of the workload argument + * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument + */ + GpuWorkloadArgument(const ITensorInfo &tensor_info, + const MemoryDescriptor &mem_desc, + const GpuKernelArgumentList &kernel_args) + : _tensor_info{ tensor_info }, + _mem_desc{ mem_desc }, + _kernel_args{ kernel_args } + { + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get tensor id within workload */ ITensorInfo::Id id() const { @@ -85,6 +138,7 @@ public: { return &_mem_desc; } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Get @ref GpuKernelArgumentInfo of the argument */ GpuKernelArgumentInfo *kernel_argument_info() { @@ -95,6 +149,18 @@ public: { return &_kernel_arg_info; } +#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Get @ref GpuKernelArgumentList of the workload tensor */ + GpuKernelArgumentList *kernel_argument_list() + { + return &_kernel_args; + } + /** Get @ref GpuKernelArgumentList of the workload tensor */ + const GpuKernelArgumentList *kernel_argument_list() const + { + return &_kernel_args; + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Check if the workload argument has valid id * * @return true If has valid id @@ -106,9 +172,13 @@ public: } private: - TensorInfo _tensor_info{}; - MemoryDescriptor _mem_desc{}; - GpuKernelArgumentInfo _kernel_arg_info{}; + TensorInfo _tensor_info{}; + MemoryDescriptor _mem_desc{}; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentInfo _kernel_arg_info {}; +#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentList _kernel_args {}; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; /** Describes when a unit workload is run. @@ -179,15 +249,18 @@ public: * @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload * @param[in] stage Stage of the unit workload * @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload + * @param[in] context @ref GpuWorkloadContext associated with the unit workload * * @return UnitWorkloadId Allocated unit workload id */ - UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map) + UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map, const GpuWorkloadContext *context) { // Use the size of the kernel codes as Id const auto uwk_id = static_cast(_unit_workloads.size()); const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage); _unit_workloads.push_back(unit_work); +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + ARM_COMPUTE_UNUSED(context); // Assemble kernel argument with memory descriptor to form workload argument for(const auto &id_arg : kernel_code.arguments()) { @@ -200,6 +273,28 @@ public: } _tensor_uwork_map[arg_id].insert(uwk_id); } +#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentList flat_kernel_args = kernel_code.arguments(); + GpuKernelArgumentList tensor_kargs{}; + while(true) + { + tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args); + if(tensor_kargs.empty()) + { + break; + } + else + { + const auto tensor_id = tensor_kargs.at(0).id(); + _workload_arguments[tensor_id] = GpuWorkloadArgument{ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs }; + if(_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end()) + { + _tensor_uwork_map[tensor_id] = std::set(); + } + _tensor_uwork_map[tensor_id].insert(uwk_id); + } + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF return uwk_id; } /** Get a unit workload from its id */ diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h index ae67790b4b..28e5432224 100644 --- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h +++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,9 +27,11 @@ #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" #include #include +#include namespace arm_compute { @@ -56,8 +58,20 @@ public: virtual std::string get_config_id() = 0; /** Generate execution window */ virtual Window get_window() const = 0; - /** Get the kernel argument lists of the kernel*/ - virtual std::map get_tensors() = 0; + /** Get the kernel argument lists of the kernel + * @deprecated To be removed along with ClTemplateWriter + */ + virtual std::map get_tensors() + { + return {}; + } +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF + /** Get the flat list of arguments of the kernel*/ + virtual GpuKernelArgumentList get_kernel_arguments() + { + return {}; + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp index d5c03c60c5..d78956f835 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Window.h" #include "src/common/utils/Log.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" @@ -42,29 +43,24 @@ namespace experimental namespace dynamic_fusion { GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components) - : _components{ components } + : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL } { } std::string GpuCkwDriver::get_name() { ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - return "todo_get_name"; + return "unnamed"; } std::string GpuCkwDriver::get_code() { - ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL); - GpuCkwKernelWriter root_writer(kernel); + _kernel.name(get_name()); + GpuCkwKernelWriter root_writer(_kernel); GpuCkwScopedKernelWriter writer(&root_writer); GpuCkwVariableTable vtable{}; // Global Kernel Writer Driver code - - // The following is just an incomplete example of using the kernel writer - - // Iterate over component specific Ckw Driver; generate component code and concatenate them for(auto &comp : _components) { auto ckw_driver = comp->ckw_component_driver(); @@ -96,18 +92,31 @@ Window GpuCkwDriver::get_window() const return root_comp->ckw_component_driver()->get_window(); } -std::map GpuCkwDriver::get_tensors() +GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments() { - ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - // Assemble GpuKernelArguments - std::map tensors; - for(const auto t : _components.get_argument_tensors()) + GpuKernelArgumentList args{}; + for(const auto &arg : _kernel.arguments()) { - tensors.emplace( - t->id(), - GpuKernelArgument{ *t, { GpuKernelArgumentInfo::Type::Tensor_Special_0 } }); + switch(arg.type()) + { + case KernelArgument::Type::TensorStorage: + { + args.emplace_back(static_cast(arg.id()), from_ckw(arg.tensor_storage_type())); + break; + } + case KernelArgument::Type::TensorComponent: + { + args.emplace_back(static_cast(arg.id()), from_ckw(arg.tensor_component_type())); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported KernelArgument Type"); + break; + } + } } - return tensors; + return args; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h index 2084b72098..c6e03f6e03 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h @@ -28,6 +28,8 @@ #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" +#include "ckw/Kernel.h" + #include #include @@ -66,11 +68,12 @@ public: std::string get_config_id() override; /** Generate execution window */ Window get_window() const override; - /** Get the kernel argument lists of the kernel*/ - std::map get_tensors() override; + /** Get the flat list of arguments of the kernel*/ + GpuKernelArgumentList get_kernel_arguments() override; private: GpuKernelComponentGroup _components{}; + ckw::Kernel _kernel; }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp index 154968775c..6f3eca711d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp @@ -23,9 +23,10 @@ */ #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include @@ -35,7 +36,8 @@ namespace experimental { namespace dynamic_fusion { -GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias) +GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage, + const std::string &alias) { ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected"); @@ -59,7 +61,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo std::stringstream ss; ss << alias << "_t" << abs(tensor->id()); const auto uniq_name = ss.str(); - GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) }; + GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) }; auto &&inserted = _vars.emplace(tensor->id(), var); return &(inserted.first->second); } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h index 1c9cb083ea..0649dcba9d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h @@ -24,8 +24,8 @@ #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" #include "arm_compute/core/ITensorInfo.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" #include @@ -37,6 +37,7 @@ namespace dynamic_fusion { class GpuKernelComponentGroup; class GpuCkwScopedKernelWriter; +enum class TensorStorageType; /** A table of all the variables used in the kernel. * @@ -52,11 +53,13 @@ public: * @param[in] comp_group Component group the tensor belongs to * @param[in] writer Compute Kernel Writer * @param[in] tensor Tensor info with which the new variable is associated + * @param[in] storage Tensor storage type associated with the tensor * @param[in] alias Alias for the variable. Will be used as part of the variable name * * @return GpuCkwComponentArgument* */ - GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed"); + GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage, + const std::string &alias = "unnamed"); private: std::map _vars{}; diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp index 224c176a31..c07fac0e0d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp @@ -23,14 +23,15 @@ */ #include "GpuCkwActivation.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include @@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_ } // namespace GpuCkwActivation::GpuCkwActivation(ComponentId id, - const ArgumentPack &tensors, - const Attributes &attributes) + const ArgumentPack &tensors, + const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}, @@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler); @@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp auto &dst_tile = dst->tile(); // Constants - const auto &constant_minus_1 = writer->declare_tile("minus_1", -1); - const auto &constant_pos_1 = writer->declare_tile("one", 1); - const auto &constant_zero = writer->declare_tile("zero", 0); - const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a()); - const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b()); + const auto &constant_minus_1 = writer->declare_tile("minus_1", -1); + const auto &constant_pos_1 = writer->declare_tile("one", 1); + const auto &constant_zero = writer->declare_tile("zero", 0); + const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a()); + const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b()); // Perform the operation. - switch (_attributes.activation()) + switch(_attributes.activation()) { case ActivationLayerInfo::ActivationFunction::LOGISTIC: { @@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp index dd71c55df2..8d7e6a8c37 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp @@ -23,14 +23,15 @@ */ #include "GpuCkwCast.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include @@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_ } // namespace GpuCkwCast::GpuCkwCast(ComponentId id, - const ArgumentPack &tensors, - const Attributes &attributes) + const ArgumentPack &tensors, + const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}, @@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); // Load the source tile and prepare the sampler. if(!src->has_tile()) @@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa if(!dst->has_tile()) { // Get Target datatype and convert it to ckw::DataType. - ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); + ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); // Create dst_tile based on src_tile dimensions and with target DataType. const TileInfo src_tile_info = src_tile.tile_info(); @@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index 685bf391dc..15e32e26d5 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -23,14 +23,16 @@ */ #include "GpuCkwElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "ckw/types/TensorSamplerTypes.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include @@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer, auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); - auto &const_0 = writer->declare_tile("0", 0); - writer->op_get_global_id(gid_0, 0); writer->op_get_global_id(gid_1, 1); writer->op_get_global_id(gid_2, 2); - sampler.x(gid_0); - sampler.y(gid_1); + auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32); + auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32); + auto &m0_t = writer->declare_tile("m0", m0); + auto &n0_t = writer->declare_tile("n0", n0); + writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t); + writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t); + + sampler.x(x_coord); + sampler.y(y_coord); + auto &const_0 = writer->declare_tile("0", 0); sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension sampler.b(gid_2); @@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs"); - GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); + GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); // Load the LHS and RHS tiles and prepare the tensor sampler. load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler); @@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - // constexpr unsigned int vector_size_byte_opencl = 16; - // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp index 63555e6064..247d1b834f 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp @@ -24,6 +24,7 @@ #include "GpuCkwStore.h" #include "arm_compute/core/Error.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" @@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack &tensor } void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const { - auto src = vtable.declare_variable(comp_group, writer, _src, "src"); - auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); auto &src_tile = src->tile(); const auto &sampler = src->tile_sampler(); diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h index 9027bddd76..8a38d67d80 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "ckw/TensorInfo.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" namespace arm_compute { @@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info) tensor_info.id() }; } + +inline TensorComponentType from_ckw(const ckw::TensorComponentType &component) +{ + switch(component) + { + case ckw::TensorComponentType::OffsetFirstElement: + return TensorComponentType::OffsetFirstElement; + break; + case ckw::TensorComponentType::Stride0: + return TensorComponentType::Stride0; + break; + case ckw::TensorComponentType::Stride1: + return TensorComponentType::Stride1; + break; + case ckw::TensorComponentType::Stride2: + return TensorComponentType::Stride2; + break; + case ckw::TensorComponentType::Stride3: + return TensorComponentType::Stride3; + break; + case ckw::TensorComponentType::Stride4: + return TensorComponentType::Stride4; + break; + case ckw::TensorComponentType::Dim0: + return TensorComponentType::Dim0; + break; + case ckw::TensorComponentType::Dim1: + return TensorComponentType::Dim1; + break; + case ckw::TensorComponentType::Dim2: + return TensorComponentType::Dim2; + break; + case ckw::TensorComponentType::Dim3: + return TensorComponentType::Dim3; + break; + case ckw::TensorComponentType::Dim4: + return TensorComponentType::Dim4; + break; + case ckw::TensorComponentType::Dim1xDim2: + return TensorComponentType::Dim1xDim2; + break; + case ckw::TensorComponentType::Dim2xDim3: + return TensorComponentType::Dim2xDim3; + break; + case ckw::TensorComponentType::Dim1xDim2xDim3: + return TensorComponentType::Dim1xDim2xDim3; + break; + case ckw::TensorComponentType::Unknown: + return TensorComponentType::Unknown; + default: + ARM_COMPUTE_ERROR("Unknown CKW tensor component"); + return TensorComponentType::Unknown; + } +} + +inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage) +{ + switch(storage) + { + case TensorStorageType::ClBufferUint8Ptr: + return ckw::TensorStorageType::BufferUint8Ptr; + break; + case TensorStorageType::ClImage2dReadOnly: + return ckw::TensorStorageType::Texture2dReadOnly; + break; + case TensorStorageType::ClImage2dWriteOnly: + return ckw::TensorStorageType::Texture2dWriteOnly; + break; + case TensorStorageType::Unknown: + return ckw::TensorStorageType::Unknown; + break; + default: + ARM_COMPUTE_ERROR("Unknown tensor storage type"); + return ckw::TensorStorageType::Unknown; + } +} +inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage) +{ + switch(storage) + { + case ckw::TensorStorageType::BufferUint8Ptr: + return TensorStorageType::ClBufferUint8Ptr; + break; + case ckw::TensorStorageType::Texture2dReadOnly: + return TensorStorageType::ClImage2dReadOnly; + break; + case ckw::TensorStorageType::Texture2dWriteOnly: + return TensorStorageType::ClImage2dWriteOnly; + break; + case ckw::TensorStorageType::Unknown: + return TensorStorageType::Unknown; + break; + default: + ARM_COMPUTE_ERROR("Unknown CKW tensor storage type"); + return TensorStorageType::Unknown; + } +} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h index d600956b4f..af766a7ece 100644 --- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h +++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h @@ -104,9 +104,11 @@ public: { return _properties; } - /** Get template writer for the component */ - virtual const IGpuTemplateComponentWriter *template_writer() const = 0; - /** Get compute kernel writer driver for the component */ + /** Get writer for the component */ + virtual const IGpuTemplateComponentWriter *template_writer() const + { + return nullptr; + } virtual const IGpuCkwComponentDriver *ckw_component_driver() const { return nullptr; diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp index d2cde40a10..c41257d18c 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp @@ -24,8 +24,11 @@ #include "ClComponentActivation.h" #include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -66,8 +69,17 @@ ClComponentActivation::ClComponentActivation(ComponentId const ArgumentPack &tensors, const Attributes &attributes) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) }, - _ckw_driver{ std::make_unique(id, tensors, attributes) } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -75,15 +87,15 @@ ClComponentActivation::~ClComponentActivation() { } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } -const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const -{ - return _ckw_driver.get(); -} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h index bb6f7c6e30..ebe8719420 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h @@ -42,8 +42,11 @@ template class ArgumentPack; /** Forward declaration */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF class ClTemplateActivation; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwActivation; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentActivation final : public IGpuKernelComponent { @@ -106,10 +109,12 @@ public: /** Allow instances of this class to be moved */ ClComponentActivation &operator=(ClComponentActivation &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; - - const IGpuCkwComponentDriver *ckw_component_driver() const override; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override @@ -118,8 +123,11 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp index 92933ae7a5..635869f817 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp @@ -26,8 +26,11 @@ #include "arm_compute/core/Error.h" #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -67,23 +70,32 @@ ClComponentCast::ClComponentCast(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) }, - _ckw_driver{ std::make_unique(id, tensors, attributes) } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { ARM_COMPUTE_UNUSED(attributes, settings); } ClComponentCast::~ClComponentCast() { } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } -const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const -{ - return _ckw_driver.get(); -} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h index 174f9670b3..37b8cbb6c9 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h @@ -48,8 +48,11 @@ private: }; /** Forward declaration */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF class ClTemplateCast; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwCast; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentCast final : public IGpuKernelComponent { @@ -116,10 +119,12 @@ public: ClComponentCast(ClComponentCast &&component) = default; /** Allow instances of this class to be moved */ ClComponentCast &operator=(ClComponentCast &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; - /** Get GPU kernel writer for the component */ +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -127,8 +132,11 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 52739e23c0..88d729170c 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -25,8 +25,11 @@ #include "arm_compute/core/Validate.h" #include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -106,28 +109,38 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack &t return Status{}; } +ClComponentElementwiseBinary::~ClComponentElementwiseBinary() +{ +} ClComponentElementwiseBinary::ClComponentElementwiseBinary( ComponentId id, const Properties &properties, const ArgumentPack &tensors, const Attributes &attributes) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) }, - _ckw_driver{ std::make_unique(id, tensors, attributes) } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors, attributes) } -ClComponentElementwiseBinary::~ClComponentElementwiseBinary() +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors, attributes) } -const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _component_writer.get(); } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _ckw_driver.get(); + return _component_writer.get(); } + } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h index a56dd8b37d..f7175903d0 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -40,8 +40,11 @@ template class ArgumentPack; /** Forward declaration */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF class ClTemplateElementwiseBinary; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwElementwiseBinary; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentElementwiseBinary final : public IGpuKernelComponent { @@ -101,10 +104,13 @@ public: ClComponentElementwiseBinary(ClComponentElementwiseBinary &&component) = default; /** Allow instances of this class to be moved */ ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -112,10 +118,13 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */ +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp index a3283b1866..12b81c3d56 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp @@ -24,8 +24,11 @@ #include "ClComponentStore.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF #include @@ -43,19 +46,30 @@ Status ClComponentStore::validate( return Status{}; } ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack &tensors) - : IGpuKernelComponent{ id, properties, tensors }, _component_writer{ std::make_unique(id, tensors) }, _ckw_driver{ std::make_unique(id, tensors) } + : IGpuKernelComponent{ id, properties, tensors }, +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors) } -ClComponentStore::~ClComponentStore() +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors) } -const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _component_writer.get(); } +ClComponentStore::~ClComponentStore() +{ +} +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _ckw_driver.get(); + return _component_writer.get(); } } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h index f168ccb97e..853ee39012 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h @@ -25,7 +25,6 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h" #include namespace arm_compute @@ -39,7 +38,11 @@ namespace dynamic_fusion /** Forward declaration */ template class ArgumentPack; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF +class ClTemplateStore; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwStore; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentStore final : public IGpuKernelComponent { @@ -85,10 +88,12 @@ public: ClComponentStore(ClComponentStore &&component) = default; /** Allow instances of this class to be moved */ ClComponentStore &operator=(ClComponentStore &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; - +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -96,8 +101,11 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental -- cgit v1.2.1