From 23882a9014eb3972bca958206866c8e0d0b829cc Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Wed, 28 Jun 2023 09:49:45 +0100 Subject: Add GpuKernelArgumentBinding for runtime argument setting * Add flexible runtime argument setting that accept argument bindings exported from ckw. * Introduce internal build flag ACL_INTERNAL_TEST_CKW_IN_DF. If set to true, ckw will be tested in dynamic fusion validation tests. Otherwise it will not be tested and the dynamic fusion will keep using ClTemplateWriter instead. * Fix CKW sampler for elementwise binary to deal with tile sizes > 1 in both dimensions Resolves: COMPMID-6282 Partially resolves: COMPMID-6260 Signed-off-by: SiCong Li Change-Id: I0ab225a4484eb2119643d900a4e72806558626ee Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9917 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Reviewed-by: Anitha Raj Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- SConscript | 9 +- .../prototype/include/ckw/Kernel.h | 11 ++ compute_kernel_writer/prototype/src/Kernel.cpp | 10 ++ filelist.json | 144 +++++++++++---------- scripts/clang_tidy_rules.py | 3 + .../runtime/gpu/cl/ClKernelRuntime.cpp | 81 ++++++++++-- .../runtime/gpu/cl/ClKernelRuntime.h | 17 ++- .../cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp | 38 +++--- .../cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h | 10 +- src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h | 128 +++++++++++++++++- .../sketch/gpu/GpuKernelComponentGraph.cpp | 6 +- .../sketch/gpu/GpuKernelComponentGraph.h | 10 +- .../sketch/gpu/GpuKernelComponentStream.cpp | 8 +- .../sketch/gpu/GpuKernelComponentStream.h | 6 +- .../sketch/gpu/GpuKernelSourceCode.h | 16 ++- src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp | 16 ++- .../sketch/gpu/GpuWorkloadContext.cpp | 43 +++--- .../sketch/gpu/GpuWorkloadContextImpl.h | 30 +++-- .../sketch/gpu/GpuWorkloadSketchImpl.h | 44 ++----- .../sketch/gpu/GpuWorkloadSourceCode.h | 105 ++++++++++++++- src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h | 20 ++- .../sketch/gpu/ckw_driver/GpuCkwDriver.cpp | 45 ++++--- .../sketch/gpu/ckw_driver/GpuCkwDriver.h | 7 +- .../sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp | 8 +- .../sketch/gpu/ckw_driver/GpuCkwVariableTable.h | 7 +- .../gpu/ckw_driver/components/GpuCkwActivation.cpp | 31 ++--- .../gpu/ckw_driver/components/GpuCkwCast.cpp | 21 +-- .../components/GpuCkwElementwiseBinary.cpp | 33 +++-- .../gpu/ckw_driver/components/GpuCkwStore.cpp | 5 +- .../ckw_driver/components/utils/TypeConverter.h | 98 ++++++++++++++ .../sketch/gpu/components/IGpuKernelComponent.h | 8 +- .../gpu/components/cl/ClComponentActivation.cpp | 26 +++- .../gpu/components/cl/ClComponentActivation.h | 16 ++- .../sketch/gpu/components/cl/ClComponentCast.cpp | 26 +++- .../sketch/gpu/components/cl/ClComponentCast.h | 14 +- .../components/cl/ClComponentElementwiseBinary.cpp | 27 +++- .../components/cl/ClComponentElementwiseBinary.h | 21 ++- .../sketch/gpu/components/cl/ClComponentStore.cpp | 26 +++- .../sketch/gpu/components/cl/ClComponentStore.h | 16 ++- .../dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp | 3 + .../dynamic_fusion/gpu/cl/DirectConv2d.cpp | 3 + tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp | 5 +- tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp | 3 + tests/validation/dynamic_fusion/gpu/cl/Resize.cpp | 3 + tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp | 3 + 45 files changed, 906 insertions(+), 304 deletions(-) diff --git a/SConscript b/SConscript index da7683ed6c..868bc9f8f5 100644 --- a/SConscript +++ b/SConscript @@ -564,7 +564,14 @@ if env['fixed_format_kernels']: # Experimental files # Dynamic fusion if env['experimental_dynamic_fusion']: - lib_files += filelist['experimental']['dynamic_fusion'] + lib_files += filelist['experimental']['dynamic_fusion']['common'] + lib_files += filelist['experimental']['dynamic_fusion']['template_writer'] + +if "ACL_INTERNAL_TEST_CKW_IN_DF" in env["extra_cxx_flags"]: + if not env["experimental_dynamic_fusion"]: + print("To use ACL_INTERNAL_TEST_CKW_IN_DF experimental_dynamic_fusion must be set to 1") + Exit(1) + lib_files += filelist['experimental']['dynamic_fusion']['ckw_driver'] # Logging files if env["logging"]: diff --git a/compute_kernel_writer/prototype/include/ckw/Kernel.h b/compute_kernel_writer/prototype/include/ckw/Kernel.h index 3deb2ace0d..ba31a29ba7 100644 --- a/compute_kernel_writer/prototype/include/ckw/Kernel.h +++ b/compute_kernel_writer/prototype/include/ckw/Kernel.h @@ -48,6 +48,11 @@ class GpuKernelWriterDataHolder; class Kernel { public: + /** Constructor + * + * @param[in] language The programming language to write the kernel. + */ + Kernel(GpuTargetLanguage language); /** Constructor * * @param[in] name The name of the kernel function. @@ -61,6 +66,12 @@ public: /** Get the name of the kernel function. */ const std::string &name() const; + /** Set the name of the kernel function. + * + * @param[in] name The name of the kernel function. + */ + void name(const std::string &name); + /** Get the list of kernel arguments. */ ::std::vector arguments() const; diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp index 884b69afc6..095ac879f1 100644 --- a/compute_kernel_writer/prototype/src/Kernel.cpp +++ b/compute_kernel_writer/prototype/src/Kernel.cpp @@ -30,11 +30,17 @@ namespace ckw { +Kernel::Kernel(GpuTargetLanguage language) + : Kernel{"unnamed", language} +{ +} + Kernel::Kernel(const char *name, GpuTargetLanguage language) : _name(name), _kernel(std::make_unique(language)), _operands{}, _tensor_id_operands{} { } + Kernel::~Kernel() { } @@ -44,6 +50,10 @@ const std::string &Kernel::name() const return _name; } +void Kernel::name(const std::string& name) +{ + _name = name; +} std::vector Kernel::arguments() const { std::vector arguments; diff --git a/filelist.json b/filelist.json index 18fcaa81a0..b7845a760a 100644 --- a/filelist.json +++ b/filelist.json @@ -2286,74 +2286,80 @@ } }, "experimental": { - "dynamic_fusion": [ - "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp", - "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp", - "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", - "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", - "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp", - "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp", - "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp", - "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp", - "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp", - "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp", - "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp", - "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp", - "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp", - "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp", - "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp" - ] + "dynamic_fusion": { + "common": [ + "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp", + "src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp", + "src/dynamic_fusion/sketch/attributes/CastAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Conv2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", + "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp", + "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp", + "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp", + "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp", + "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp", + "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp", + "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp", + "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp" + ], + "template_writer": [ + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp", + "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp" + ], + "ckw_driver": [ + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp", + "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp" + ] + } } } diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 11ca6c45f5..1e1ab7f545 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -43,6 +43,9 @@ def get_list_flags( filename, arch): flags.append("-DARM_COMPUTE_OPENCL_ENABLED") if arch == "aarch64": flags.append("-DARM_COMPUTE_AARCH64_V8_2") + if "ckw_driver" in filename: + flags.append("-DACL_INTERNAL_TEST_CKW_IN_DF") + return flags def filter_files( list_files ): diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp index 6a57482bb2..92ca8557f1 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp @@ -24,6 +24,9 @@ #include "ClKernelRuntime.h" #include "arm_compute/core/CL/ICLTensor.h" #include "src/core/CL/CLUtils.h" +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h" +#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" #include "src/gpu/cl/ClKernelLibrary.h" @@ -57,6 +60,8 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe _arguments = code.arguments(); } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector &cl_images) { ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); @@ -163,21 +168,65 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer } } +#else // ACL_INTERNAL_TEST_CKW_IN_DF +inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector &cl_images) +{ + switch(arg.type()) + { + case GpuKernelArgumentBinding::Type::TensorStorage: + { + switch(arg.tensor_storage_type()) + { + case TensorStorageType::ClBufferUint8Ptr: + { + cl_add_buffer_argument(_kernel, idx, tensor->cl_buffer()); + break; + } + case TensorStorageType::ClImage2dReadOnly: + { + cl::Image2D tensor_image2d = create_image2d_from_tensor(tensor, CLImage2DType::ReadOnly); + cl_images.push_back(tensor_image2d); + cl_add_texture_argument(_kernel, idx, tensor_image2d); + break; + } + case TensorStorageType::ClImage2dWriteOnly: + { + cl::Image2D tensor_image2d = create_image2d_from_tensor(tensor, CLImage2DType::WriteOnly); + cl_images.push_back(tensor_image2d); + cl_add_texture_argument(_kernel, idx, tensor_image2d); + break; + } + default: + { + ARM_COMPUTE_ERROR("Do not accept other TensorStorageType"); + break; + } + } + break; + } + case GpuKernelArgumentBinding::Type::TensorComponent: + { + cl_add_tensor_component_argument(_kernel, idx, tensor, arg.tensor_component_type()); + break; + } + default: + { + ARM_COMPUTE_ERROR("Do not accept other types of kernel arguments"); + break; + } + } +} + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); Window slice = window.first_slice_window_3D(); - // Don't slice matrix along the z dimension if matrix has just 2 dimensions and matrix A more than 2 - // This scenario can happen when the matrix multiplication is used to perform a convolution operation - Window slice_fixed_z = slice; - slice_fixed_z.set(Window::DimX, Window::Dimension(0, 1, 1)); - slice_fixed_z.set(Window::DimY, Window::Dimension(0, 1, 1)); /// NOTE: Parameters extracted from old kernels. So far they seem to be constant /// but we may need to make them into another configuration passed from GpuWorkloadSourceCode if needed in the future - constexpr bool slide_along_dimz = true; constexpr bool skip_sliding_window = false; constexpr bool use_dummy_work_items = false; @@ -185,23 +234,27 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com do { // Set kernel arguments - Window arg_slice = slice; // CLImages created from tensor arguments. Need to be retained until enqueue std::vector cl_images; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF for(auto id_arg : _arguments) { const auto arg = id_arg.second; auto tensor = utils::cast::polymorphic_downcast(tensors.get_tensor(id_arg.first)); ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); - if(!slide_along_dimz) - { - // The stride_z for matrix must be zero if we do not slice - ARM_COMPUTE_ERROR_ON(tensor->info()->strides_in_bytes()[3] != 0); - arg_slice = slice_fixed_z; - } - add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, arg_slice, cl_images); + add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images); + } + +#else // ACL_INTERNAL_TEST_CKW_IN_DF + for(const auto &arg : _arguments) + { + auto tensor = utils::cast::polymorphic_downcast(tensors.get_tensor(arg.id())); + ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); + ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); + add_kernel_argument(idx, arg, tensor, cl_images); } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF // Dispatch kernel enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items); diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h index 4787acabcd..92e73503ce 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,6 +29,8 @@ #include "src/gpu/cl/ClCompileContext.h" #include "src/gpu/cl/IClKernel.h" +#include + namespace arm_compute { namespace experimental @@ -57,6 +59,7 @@ public: virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Set a kernel tensor argument * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -66,9 +69,19 @@ private: * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued) */ inline void add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector &cl_images); +#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Set a kernel argument as part of a tensor + * + * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. + * @param[in] arg Kernel argument binding, as part of @p tensor + * @param[in] tensor Tensor of which the kernel argument @p arg is a part of + * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued) + */ + inline void add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector &cl_images); +#endif // ACL_INTERNAL_TEST_CKW_IN_DF private: - GpuKernelArgumentList _arguments{}; /** All kernel arguments required by the runtime */ + GpuKernelArgumentList _arguments{}; }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp index b273c2a20c..84fb279237 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp +++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp @@ -26,7 +26,11 @@ namespace arm_compute { -void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponentType component) +namespace experimental +{ +namespace dynamic_fusion +{ +void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component) { ARM_COMPUTE_ERROR_ON(tensor == nullptr); @@ -35,49 +39,49 @@ void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICL switch(component) { - case ckw::TensorComponentType::OffsetFirstElement: + case TensorComponentType::OffsetFirstElement: kernel.setArg(idx++, info->offset_first_element_in_bytes()); break; - case ckw::TensorComponentType::Stride0: + case TensorComponentType::Stride0: kernel.setArg(idx++, strides[0]); break; - case ckw::TensorComponentType::Stride1: + case TensorComponentType::Stride1: kernel.setArg(idx++, strides[1]); break; - case ckw::TensorComponentType::Stride2: + case TensorComponentType::Stride2: kernel.setArg(idx++, strides[2]); break; - case ckw::TensorComponentType::Stride3: + case TensorComponentType::Stride3: kernel.setArg(idx++, strides[3]); break; - case ckw::TensorComponentType::Stride4: + case TensorComponentType::Stride4: kernel.setArg(idx++, strides[4]); break; - case ckw::TensorComponentType::Dim0: + case TensorComponentType::Dim0: kernel.setArg(idx++, info->dimension(0)); break; - case ckw::TensorComponentType::Dim1: + case TensorComponentType::Dim1: kernel.setArg(idx++, info->dimension(1)); break; - case ckw::TensorComponentType::Dim2: + case TensorComponentType::Dim2: kernel.setArg(idx++, info->dimension(2)); break; - case ckw::TensorComponentType::Dim3: + case TensorComponentType::Dim3: kernel.setArg(idx++, info->dimension(3)); break; - case ckw::TensorComponentType::Dim4: + case TensorComponentType::Dim4: kernel.setArg(idx++, info->dimension(4)); break; - case ckw::TensorComponentType::Dim1xDim2: + case TensorComponentType::Dim1xDim2: kernel.setArg(idx++, info->dimension(1) * info->dimension(2)); break; - case ckw::TensorComponentType::Dim2xDim3: + case TensorComponentType::Dim2xDim3: kernel.setArg(idx++, info->dimension(2) * info->dimension(3)); break; - case ckw::TensorComponentType::Dim1xDim2xDim3: + case TensorComponentType::Dim1xDim2xDim3: kernel.setArg(idx++, info->dimension(1) * info->dimension(2) * info->dimension(3)); break; - case ckw::TensorComponentType::Unknown: + case TensorComponentType::Unknown: default: ARM_COMPUTE_ERROR("Unknown tensor component"); } @@ -93,4 +97,6 @@ void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Im kernel.setArg(idx++, image); } +} // namespace dynamic_fusion +} // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h index 3c785732a5..4cbb157a48 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h +++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h @@ -27,10 +27,14 @@ #include "arm_compute/core/CL/ICLTensor.h" -#include "ckw/TensorInfo.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" namespace arm_compute { +namespace experimental +{ +namespace dynamic_fusion +{ /** Select a Compute Kernel Writer tensor component from a tensor and add to the kernel's arguments at the specified index idx. * * @param[in,out] kernel OpenCL kernel to configure with the provided argument. @@ -38,7 +42,7 @@ namespace arm_compute * @param[in] tensor Tensor from which to access the tensor component. * @param[in] component Tensor component to select such as tensor dimensions, strides, etc. */ -void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponentType component); +void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component); /** Add an OpenCL buffer object to the kernel's arguments at the specified index @p idx. * @@ -56,6 +60,8 @@ void cl_add_buffer_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Buf */ void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Image &image); +} // namespace dynamic_fusion +} // namespace experimental } // namespace arm_compute #endif /* ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CKW_DRIVER_GPUCKWKERNELARGUMENTSHELPERS */ diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h index 302d4c8562..226e1a2df3 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h @@ -33,6 +33,7 @@ namespace experimental namespace dynamic_fusion { /** Contain information required to set up a kernel argument at run time + * @deprecated To be removed along with ClTemplateWriter */ struct GpuKernelArgumentInfo { @@ -66,10 +67,9 @@ struct GpuKernelArgumentInfo } Type type{ Type::Tensor_4D_t_Buffer }; }; - bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1); - /** Kernel argument information linked with its corresponding @ref ITensorInfo + * @deprecated To be removed along with ClTemplateWriter */ class GpuKernelArgument { @@ -124,6 +124,130 @@ private: TensorInfo _tensor_info{}; GpuKernelArgumentInfo _kernel_arg_info{}; }; +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF +/** Describe how the tensor runtime memory can be accessed + * + * Please see documentation under @ref GpuKernelArgumentBinding + */ +enum class TensorStorageType +{ + Unknown, + ClBufferUint8Ptr, + ClImage2dReadOnly, + ClImage2dWriteOnly, +}; + +/** Describe additional runtime information about the tensor + * + * Please see documentation under @ref GpuKernelArgumentBinding + */ +enum class TensorComponentType +{ + Unknown, + OffsetFirstElement, + Stride0, + Stride1, + Stride2, + Stride3, + Stride4, + Dim0, + Dim1, + Dim2, + Dim3, + Dim4, + Dim1xDim2, + Dim2xDim3, + Dim1xDim2xDim3, +}; + +/** Describe how to extract information from a runtime Gpu tensor, and set it as an argument to a gpu kernel at runtime + * + * A kernel argument is just an argument to the gpu kernel as shown in the argument list below. This contrasts with a "workload argument" which is a tensor (@ref GpuWorkloadArgument) + * void kernel(arg0, arg1, ... argN) + * + * In a kernel generated using dynamic fusion (@ref GpuKernelSourceCode), every kernel argument describes part of a tensor. + * A tensor is described as: **storages** followed by **components** + * + * A storage (@ref TensorStorageType) describes how the tensor runtime memory can be accessed (e.g. via a global uint8 pointer to a CL buffer) + * A component (@ref TensorComponentType) describes additional runtime information about the tensor (e.g. the dimensions of the tensor) + * + * The arguments are arranged in the order of use in the generated kernel code: + * + * arg0 , arg1 , arg2 , ..., , argN + * storage, component0, component1, ..., componentX, storage, component0, component1, ..., componentY + * | tensor0 | tensor1 | + * + * An example argument list: + * + * void kernel( + * image2d_t t0_image, // TensorStorageType::ClImage2dReadOnly + * uint8_t* t0_ptr, // TensorStorageType::ClBufferUint8Ptr + * uint t0_dim0, // TensorComponentType::Dim0 + * uint t0_stride1, // TensorComponentType::Stride1 + * image2d_t t1_ptr, // TensorStorageType::ClImage2dReadOnly + * uint t1_dim1xdim2, // TensorComponentType::Dim1xDim2 + * uint t1_stride1, // TensorComponentType::Stride1 + * uint t1_stride2, // TensorComponentType:Stride2 + * ) + * + */ +class GpuKernelArgumentBinding +{ +public: + enum class Type : int32_t + { + TensorStorage, /** @ref TensorStorageType */ + TensorComponent /** @ref TensorComponentType */ + }; + GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage) + : _type{ Type::TensorStorage }, _id{ id }, _value{} + { + _value.tensor_storage_type = storage; + } + GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component) + : _type{ Type::TensorComponent }, _id{ id }, _value{} + { + _value.tensor_component_type = component; + } + /** Storage type of the tensor + */ + TensorStorageType tensor_storage_type() const + { + ARM_COMPUTE_ERROR_ON(_type != Type::TensorStorage); + return _value.tensor_storage_type; + } + /** Component of the tensor + */ + TensorComponentType tensor_component_type() const + { + ARM_COMPUTE_ERROR_ON(_type != Type::TensorComponent); + return _value.tensor_component_type; + } + /** Id of the tensor this kernel argument belongs to + */ + ITensorInfo::Id id() const + { + return _id; + } + /** Type of the kernel argument + */ + Type type() const + { + return _type; + } + +private: + Type _type; + ITensorInfo::Id _id; + union Value + { + TensorStorageType tensor_storage_type; + TensorComponentType tensor_component_type; + }; + Value _value; +}; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF + } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp index b70a192775..5a65ede38b 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp @@ -44,14 +44,14 @@ std::vector GpuKernelComponentGraph::get_tensor_ids(c return tensor_ids; } -GpuKernelComponentGraph::GpuKernelComponentGraph(GpuComponentServices *services) - : _services{ services }, _components{}, _tensors{}, _dependency_graph{} +GpuKernelComponentGraph::GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services) + : _context{ context }, _services{ services }, _components{}, _tensors{}, _dependency_graph{} { } GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const { - GpuKernelComponentStream stream{ _services, mem_map }; + GpuKernelComponentStream stream{ _context, _services, mem_map }; const auto op_seq = _dependency_graph.build_operators_sequence(); stream.new_component_group(); diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h index 8314ea0a50..85c9b45840 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" @@ -49,9 +49,10 @@ class GpuKernelComponentGraph public: /** Constructor * + * @param[in] context @ref GpuWorkloadContext to be used by the graph * @param[in] services @ref GpuComponentServices to be used by the graph */ - GpuKernelComponentGraph(GpuComponentServices *services); + GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services); /** Prevent instances of this class from being copy constructed */ GpuKernelComponentGraph(const GpuKernelComponentGraph &graph) = delete; /** Prevent instances of this class from being copied */ @@ -98,6 +99,7 @@ public: private: static std::vector get_tensor_ids(const std::vector tensors); + GpuWorkloadContext *_context; GpuComponentServices *_services; std::map> _components; std::map _tensors; @@ -106,4 +108,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */ +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGRAPH */ diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp index 8f4eadc477..a2b6623370 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -33,8 +33,8 @@ namespace experimental { namespace dynamic_fusion { -GpuKernelComponentStream::GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map) - : _services{ services }, _component_groups{}, _mem_map{ mem_map } +GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map) + : _context{ context }, _services{ services }, _component_groups{}, _mem_map{ mem_map } { } @@ -51,7 +51,7 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code() const GpuKernelSourceCode kernel_code = logical_kernel.write_kernel_code(); // The whole unit workload stage is determined by the root component const auto unit_workload_stage = group.get_root_component()->properties().stage(); - source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map); + source_code.add_unit_workload(kernel_code, unit_workload_stage, _mem_map, _context); } return source_code; } diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h index cbaa7c297b..ba2503a938 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,10 +49,11 @@ class GpuKernelComponentStream public: /** Constructor * + * @param[in] context @ref GpuWorkloadContext to be used throughout the stream * @param[in] services @ref GpuComponentServices to be used throughout the stream * @param[in] mem_map @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode */ - GpuKernelComponentStream(GpuComponentServices *services, const MemoryDescriptorMap &mem_map); + GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map); /** Allow instances of this class to be copy constructed */ GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default; /** Allow instances of this class to be copied */ @@ -78,6 +79,7 @@ public: bool add_component(IGpuKernelComponent *component); private: + GpuWorkloadContext *_context; GpuComponentServices *_services; std::vector _component_groups{}; MemoryDescriptorMap _mem_map{}; diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h index 7479328d7b..64e1cdc3bc 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,14 +21,18 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include +#else // ACL_INTERNAL_TEST_CKW_IN_DF +#include +#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include namespace arm_compute @@ -38,7 +42,11 @@ namespace experimental namespace dynamic_fusion { /** The argument list of a @ref GpuKernelSourceCode */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF using GpuKernelArgumentList = std::map; +#else // ACL_INTERNAL_TEST_CKW_IN_DF +using GpuKernelArgumentList = std::deque; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload */ @@ -123,4 +131,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */ +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */ diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp index 00f625de28..c99984fc0e 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,7 +29,11 @@ #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h" +#else // ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h" +#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -46,11 +50,19 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern GpuKernelSourceCode GpuLogicalKernel::write_kernel_code() { GpuKernelSourceCode code; - ClTemplateWriter writer{ _comp_group }; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + ClTemplateWriter writer { _comp_group }; +#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuCkwDriver writer { _comp_group }; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF code.name(writer.get_name()); code.code(writer.get_code()); +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF code.arguments(writer.get_tensors()); +#else // ACL_INTERNAL_TEST_CKW_IN_DF + code.arguments(writer.get_kernel_arguments()); +#endif // ACL_INTERNAL_TEST_CKW_IN_DF code.build_options(writer.get_build_options()); code.config_id(writer.get_config_id()); code.window(writer.get_window()); diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp index 50f34d9c14..c2bd012703 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp @@ -32,9 +32,8 @@ namespace experimental { namespace dynamic_fusion { - GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx) - : _impl { std::make_unique(GpuLanguage::OpenCL, cl_compile_ctx) } + : _impl{ std::make_unique(GpuLanguage::OpenCL, cl_compile_ctx) } { } @@ -75,8 +74,7 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const } GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx) - : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), - _next_tensor_id(1), _mem_map() + : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info() { } @@ -103,26 +101,39 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info) tensor_info.set_id(tensor_id); _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User }; + // Save a *copy* of the user tensor info in workload context for future reference + // Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context + _managed_tensor_info.emplace(tensor_info.id(), std::make_unique(tensor_info)); } -void GpuWorkloadContext::Impl::register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info) +ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor() { - ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); - - const auto tensor_id = next_tensor_id(); - - tensor_info.set_id(tensor_id); - _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, mem_info }; + auto tensor_info = std::make_unique(); + const auto tensor_id = -next_tensor_id(); + tensor_info->set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; + auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info)); + return inserted.first->second.get(); } -void GpuWorkloadContext::Impl::register_virtual_tensor(ITensorInfo &tensor_info) +ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo &itensor_info) { - ARM_COMPUTE_ERROR_ON(tensor_info.has_valid_id()); + auto tensor_info = std::make_unique(itensor_info); + const auto tensor_id = next_tensor_id(); + tensor_info->set_id(tensor_id); + _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } }; + auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info)); + return inserted.first->second.get(); +} - const auto tensor_id = -next_tensor_id(); +ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) +{ + return _managed_tensor_info.at(id).get(); +} - tensor_info.set_id(tensor_id); - _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual }; +const ITensorInfo *GpuWorkloadContext::Impl::get_tensor_info(ITensorInfo::Id id) const +{ + return _managed_tensor_info.at(id).get(); } ITensorInfo::Id GpuWorkloadContext::Impl::next_tensor_id() diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h index a857932791..c169476a70 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h @@ -36,7 +36,6 @@ namespace experimental { namespace dynamic_fusion { - /** Internal implementation of workload context. */ class GpuWorkloadContext::Impl { @@ -52,7 +51,7 @@ public: Impl(Impl &) = default; /** Assignment */ - Impl& operator=(Impl &) = default; + Impl &operator=(Impl &) = default; /** Get target GPU language. */ GpuLanguage gpu_language() const; @@ -69,27 +68,34 @@ public: */ void register_user_tensor(ITensorInfo &tensor_info); - /** Set a new ID and register the auxiliary tensor info. + /** Create a virtual (see @ref MemoryType) tensor info and save it * - * @param[in, out] tensor_info The tensor info to be registered. - * @param[in] mem_info The auxiliary tensor memory info. + * @return ITensorInfo* The created virtual tensor info object pointer */ - void register_aux_tensor(ITensorInfo &tensor_info, const AuxMemoryInfo &mem_info); - - /** Set a new ID and register the virtual tensor info. + ITensorInfo *create_virtual_tensor(); + /** Create an auxiliary (see @ref MemoryType) tensor info and save it * - * @param[in, out] tensor_info The tensor info to be registered. + * @param[in] tensor_info @ref ITensorInfo to copy from + * + * @return ITensorInfo* The created auxiliary tensor info object pointer */ - void register_virtual_tensor(ITensorInfo &tensor_info); + ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info); + + /** Get tensor info created by this context, from id */ + ITensorInfo *get_tensor_info(ITensorInfo::Id id); + + /** Get tensor info created by this context, from id */ + const ITensorInfo *get_tensor_info(ITensorInfo::Id id) const; private: ITensorInfo::Id next_tensor_id(); - GpuLanguage _gpu_language; + GpuLanguage _gpu_language; CLCompileContext *_cl_compile_ctx; - ITensorInfo::Id _next_tensor_id; + ITensorInfo::Id _next_tensor_id; MemoryDescriptorMap _mem_map; + std::map> _managed_tensor_info; }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h index 44c99e844b..d3033898e9 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h @@ -26,13 +26,10 @@ #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h" #include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h" - -#include -#include +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" namespace arm_compute { @@ -52,9 +49,8 @@ public: Context *context) : _context{ context }, _comp_services{}, - _component_graph{ &_comp_services }, - _operator_group{}, - _managed_tensor_info_list{ std::vector>() } + _component_graph{ _context, &_comp_services }, + _operator_group{} { } /** Prevent instances of this class from being copy constructed */ @@ -90,10 +86,6 @@ public: { return _operator_group; } - ITensorInfo::Id allocate_new_tensor_id() - { - return ++_next_id; - } /** Generate @ref GpuWorkloadSourceCode from the workload sketch * @note The sketch must be valid. Any error encountered during the building of the code will be thrown. * @@ -110,16 +102,8 @@ public: */ ITensorInfo *create_virtual_tensor() { - auto uptr = std::make_unique(); - _context->implementation().register_virtual_tensor(*uptr); - _managed_tensor_info_list.emplace_back(std::move(uptr)); - return _managed_tensor_info_list.back().get(); + return _context->implementation().create_virtual_tensor(); } - /** Create an auxiliary (see @ref MemoryType) tensor info and save it - * - * @return ITensorInfo* The created auxiliary tensor info object pointer - */ - /** Create an auxiliary (see @ref MemoryType) tensor info and save it * * @param[in] tensor_info @ref ITensorInfo to copy from @@ -128,19 +112,19 @@ public: */ ITensorInfo *create_auxiliary_tensor(const ITensorInfo &tensor_info) { - auto uptr = std::make_unique(tensor_info); - _context->implementation().register_aux_tensor(*uptr, AuxMemoryInfo{ uptr->total_size() }); - _managed_tensor_info_list.emplace_back(std::move(uptr)); - return _managed_tensor_info_list.back().get(); + return _context->implementation().create_auxiliary_tensor(tensor_info); + } + + ITensorInfo *get_tensor_info(ITensorInfo::Id id) + { + return _context->implementation().get_tensor_info(id); } private: - Context *_context; - GpuComponentServices _comp_services; - GpuKernelComponentGraph _component_graph; - GpuOperatorGroup _operator_group; - ITensorInfo::Id _next_id{ ITensorInfo::invalid_tensor_id }; - std::vector> _managed_tensor_info_list; + Context *_context; + GpuComponentServices _comp_services; + GpuKernelComponentGraph _component_graph; + GpuOperatorGroup _operator_group; }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h index d1d0bdf77f..578366daaf 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h @@ -27,6 +27,7 @@ #include "arm_compute/core/experimental/Types.h" #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" +#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h" namespace arm_compute { @@ -34,10 +35,45 @@ namespace experimental { namespace dynamic_fusion { +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF +namespace +{ +/** Extract kernel arguments of one tensor from a flat list of kernel arguments. + * + * @param[in] flat_kernel_args + * @return GpuKernelArgumentList + */ +GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args) +{ + if(flat_kernel_args.empty()) + { + return {}; + } + GpuKernelArgumentList tensor_kargs{}; + + const GpuKernelArgumentBinding &karg_head = flat_kernel_args.front(); + tensor_kargs.push_back(karg_head); + flat_kernel_args.pop_front(); + const auto tensor_id = karg_head.id(); + + while(!flat_kernel_args.empty()) + { + const GpuKernelArgumentBinding &karg = flat_kernel_args.front(); + if(karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments + { + return tensor_kargs; + } + tensor_kargs.push_back(karg); + flat_kernel_args.pop_front(); + } + return tensor_kargs; +} +} +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */ using UnitWorkloadId = int32_t; -/** Describes all the info related to a kernel in order to: +/** Describes all the info related to a **workload argument** (tensor) in order to: * - be used by runtime to configure gpu kernel argument * - be used by memory managers to allocate required memory */ @@ -46,6 +82,7 @@ class GpuWorkloadArgument public: /** Default constructor */ GpuWorkloadArgument() = default; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Constructor * * @param[in] tensor_info @ref ITensorInfo of the workload argument @@ -60,6 +97,22 @@ public: _kernel_arg_info{ kernel_arg_info } { } +#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Constructor + * + * @param[in] tensor_info @ref ITensorInfo of the workload argument + * @param[in] mem_desc @ref MemoryDescriptor of the workload argument + * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument + */ + GpuWorkloadArgument(const ITensorInfo &tensor_info, + const MemoryDescriptor &mem_desc, + const GpuKernelArgumentList &kernel_args) + : _tensor_info{ tensor_info }, + _mem_desc{ mem_desc }, + _kernel_args{ kernel_args } + { + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get tensor id within workload */ ITensorInfo::Id id() const { @@ -85,6 +138,7 @@ public: { return &_mem_desc; } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Get @ref GpuKernelArgumentInfo of the argument */ GpuKernelArgumentInfo *kernel_argument_info() { @@ -95,6 +149,18 @@ public: { return &_kernel_arg_info; } +#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Get @ref GpuKernelArgumentList of the workload tensor */ + GpuKernelArgumentList *kernel_argument_list() + { + return &_kernel_args; + } + /** Get @ref GpuKernelArgumentList of the workload tensor */ + const GpuKernelArgumentList *kernel_argument_list() const + { + return &_kernel_args; + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Check if the workload argument has valid id * * @return true If has valid id @@ -106,9 +172,13 @@ public: } private: - TensorInfo _tensor_info{}; - MemoryDescriptor _mem_desc{}; - GpuKernelArgumentInfo _kernel_arg_info{}; + TensorInfo _tensor_info{}; + MemoryDescriptor _mem_desc{}; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentInfo _kernel_arg_info {}; +#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentList _kernel_args {}; +#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; /** Describes when a unit workload is run. @@ -179,15 +249,18 @@ public: * @param[in] kernel_code @ref GpuKernelSourceCode to be contained within the unit workload * @param[in] stage Stage of the unit workload * @param[in] mem_map @ref MemoryDescriptor map for all tensors within the unit workload + * @param[in] context @ref GpuWorkloadContext associated with the unit workload * * @return UnitWorkloadId Allocated unit workload id */ - UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map) + UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map, const GpuWorkloadContext *context) { // Use the size of the kernel codes as Id const auto uwk_id = static_cast(_unit_workloads.size()); const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage); _unit_workloads.push_back(unit_work); +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + ARM_COMPUTE_UNUSED(context); // Assemble kernel argument with memory descriptor to form workload argument for(const auto &id_arg : kernel_code.arguments()) { @@ -200,6 +273,28 @@ public: } _tensor_uwork_map[arg_id].insert(uwk_id); } +#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentList flat_kernel_args = kernel_code.arguments(); + GpuKernelArgumentList tensor_kargs{}; + while(true) + { + tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args); + if(tensor_kargs.empty()) + { + break; + } + else + { + const auto tensor_id = tensor_kargs.at(0).id(); + _workload_arguments[tensor_id] = GpuWorkloadArgument{ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs }; + if(_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end()) + { + _tensor_uwork_map[tensor_id] = std::set(); + } + _tensor_uwork_map[tensor_id].insert(uwk_id); + } + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF return uwk_id; } /** Get a unit workload from its id */ diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h index ae67790b4b..28e5432224 100644 --- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h +++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,9 +27,11 @@ #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" #include #include +#include namespace arm_compute { @@ -56,8 +58,20 @@ public: virtual std::string get_config_id() = 0; /** Generate execution window */ virtual Window get_window() const = 0; - /** Get the kernel argument lists of the kernel*/ - virtual std::map get_tensors() = 0; + /** Get the kernel argument lists of the kernel + * @deprecated To be removed along with ClTemplateWriter + */ + virtual std::map get_tensors() + { + return {}; + } +#ifdef ACL_INTERNAL_TEST_CKW_IN_DF + /** Get the flat list of arguments of the kernel*/ + virtual GpuKernelArgumentList get_kernel_arguments() + { + return {}; + } +#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp index d5c03c60c5..d78956f835 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp @@ -30,6 +30,7 @@ #include "arm_compute/core/Window.h" #include "src/common/utils/Log.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" @@ -42,29 +43,24 @@ namespace experimental namespace dynamic_fusion { GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components) - : _components{ components } + : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL } { } std::string GpuCkwDriver::get_name() { ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - return "todo_get_name"; + return "unnamed"; } std::string GpuCkwDriver::get_code() { - ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL); - GpuCkwKernelWriter root_writer(kernel); + _kernel.name(get_name()); + GpuCkwKernelWriter root_writer(_kernel); GpuCkwScopedKernelWriter writer(&root_writer); GpuCkwVariableTable vtable{}; // Global Kernel Writer Driver code - - // The following is just an incomplete example of using the kernel writer - - // Iterate over component specific Ckw Driver; generate component code and concatenate them for(auto &comp : _components) { auto ckw_driver = comp->ckw_component_driver(); @@ -96,18 +92,31 @@ Window GpuCkwDriver::get_window() const return root_comp->ckw_component_driver()->get_window(); } -std::map GpuCkwDriver::get_tensors() +GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments() { - ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - // Assemble GpuKernelArguments - std::map tensors; - for(const auto t : _components.get_argument_tensors()) + GpuKernelArgumentList args{}; + for(const auto &arg : _kernel.arguments()) { - tensors.emplace( - t->id(), - GpuKernelArgument{ *t, { GpuKernelArgumentInfo::Type::Tensor_Special_0 } }); + switch(arg.type()) + { + case KernelArgument::Type::TensorStorage: + { + args.emplace_back(static_cast(arg.id()), from_ckw(arg.tensor_storage_type())); + break; + } + case KernelArgument::Type::TensorComponent: + { + args.emplace_back(static_cast(arg.id()), from_ckw(arg.tensor_component_type())); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported KernelArgument Type"); + break; + } + } } - return tensors; + return args; } } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h index 2084b72098..c6e03f6e03 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h @@ -28,6 +28,8 @@ #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" +#include "ckw/Kernel.h" + #include #include @@ -66,11 +68,12 @@ public: std::string get_config_id() override; /** Generate execution window */ Window get_window() const override; - /** Get the kernel argument lists of the kernel*/ - std::map get_tensors() override; + /** Get the flat list of arguments of the kernel*/ + GpuKernelArgumentList get_kernel_arguments() override; private: GpuKernelComponentGroup _components{}; + ckw::Kernel _kernel; }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp index 154968775c..6f3eca711d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp @@ -23,9 +23,10 @@ */ #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include @@ -35,7 +36,8 @@ namespace experimental { namespace dynamic_fusion { -GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias) +GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage, + const std::string &alias) { ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected"); @@ -59,7 +61,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo std::stringstream ss; ss << alias << "_t" << abs(tensor->id()); const auto uniq_name = ss.str(); - GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) }; + GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) }; auto &&inserted = _vars.emplace(tensor->id(), var); return &(inserted.first->second); } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h index 1c9cb083ea..0649dcba9d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h @@ -24,8 +24,8 @@ #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" #include "arm_compute/core/ITensorInfo.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" #include @@ -37,6 +37,7 @@ namespace dynamic_fusion { class GpuKernelComponentGroup; class GpuCkwScopedKernelWriter; +enum class TensorStorageType; /** A table of all the variables used in the kernel. * @@ -52,11 +53,13 @@ public: * @param[in] comp_group Component group the tensor belongs to * @param[in] writer Compute Kernel Writer * @param[in] tensor Tensor info with which the new variable is associated + * @param[in] storage Tensor storage type associated with the tensor * @param[in] alias Alias for the variable. Will be used as part of the variable name * * @return GpuCkwComponentArgument* */ - GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed"); + GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage, + const std::string &alias = "unnamed"); private: std::map _vars{}; diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp index 224c176a31..c07fac0e0d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp @@ -23,14 +23,15 @@ */ #include "GpuCkwActivation.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include @@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_ } // namespace GpuCkwActivation::GpuCkwActivation(ComponentId id, - const ArgumentPack &tensors, - const Attributes &attributes) + const ArgumentPack &tensors, + const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}, @@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler); @@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp auto &dst_tile = dst->tile(); // Constants - const auto &constant_minus_1 = writer->declare_tile("minus_1", -1); - const auto &constant_pos_1 = writer->declare_tile("one", 1); - const auto &constant_zero = writer->declare_tile("zero", 0); - const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a()); - const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b()); + const auto &constant_minus_1 = writer->declare_tile("minus_1", -1); + const auto &constant_pos_1 = writer->declare_tile("one", 1); + const auto &constant_zero = writer->declare_tile("zero", 0); + const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a()); + const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b()); // Perform the operation. - switch (_attributes.activation()) + switch(_attributes.activation()) { case ActivationLayerInfo::ActivationFunction::LOGISTIC: { @@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp index dd71c55df2..8d7e6a8c37 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp @@ -23,14 +23,15 @@ */ #include "GpuCkwCast.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include @@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_ } // namespace GpuCkwCast::GpuCkwCast(ComponentId id, - const ArgumentPack &tensors, - const Attributes &attributes) + const ArgumentPack &tensors, + const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}, @@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); // Load the source tile and prepare the sampler. if(!src->has_tile()) @@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa if(!dst->has_tile()) { // Get Target datatype and convert it to ckw::DataType. - ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); + ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); // Create dst_tile based on src_tile dimensions and with target DataType. const TileInfo src_tile_info = src_tile.tile_info(); @@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index 685bf391dc..15e32e26d5 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -23,14 +23,16 @@ */ #include "GpuCkwElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "ckw/types/TensorSamplerTypes.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include @@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer, auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); - auto &const_0 = writer->declare_tile("0", 0); - writer->op_get_global_id(gid_0, 0); writer->op_get_global_id(gid_1, 1); writer->op_get_global_id(gid_2, 2); - sampler.x(gid_0); - sampler.y(gid_1); + auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32); + auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32); + auto &m0_t = writer->declare_tile("m0", m0); + auto &n0_t = writer->declare_tile("n0", n0); + writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t); + writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t); + + sampler.x(x_coord); + sampler.y(y_coord); + auto &const_0 = writer->declare_tile("0", 0); sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension sampler.b(gid_2); @@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs"); - GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); + GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); // Load the LHS and RHS tiles and prepare the tensor sampler. load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler); @@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - // constexpr unsigned int vector_size_byte_opencl = 16; - // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp index 63555e6064..247d1b834f 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp @@ -24,6 +24,7 @@ #include "GpuCkwStore.h" #include "arm_compute/core/Error.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" @@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack &tensor } void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const { - auto src = vtable.declare_variable(comp_group, writer, _src, "src"); - auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); auto &src_tile = src->tile(); const auto &sampler = src->tile_sampler(); diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h index 9027bddd76..8a38d67d80 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "ckw/TensorInfo.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" namespace arm_compute { @@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info) tensor_info.id() }; } + +inline TensorComponentType from_ckw(const ckw::TensorComponentType &component) +{ + switch(component) + { + case ckw::TensorComponentType::OffsetFirstElement: + return TensorComponentType::OffsetFirstElement; + break; + case ckw::TensorComponentType::Stride0: + return TensorComponentType::Stride0; + break; + case ckw::TensorComponentType::Stride1: + return TensorComponentType::Stride1; + break; + case ckw::TensorComponentType::Stride2: + return TensorComponentType::Stride2; + break; + case ckw::TensorComponentType::Stride3: + return TensorComponentType::Stride3; + break; + case ckw::TensorComponentType::Stride4: + return TensorComponentType::Stride4; + break; + case ckw::TensorComponentType::Dim0: + return TensorComponentType::Dim0; + break; + case ckw::TensorComponentType::Dim1: + return TensorComponentType::Dim1; + break; + case ckw::TensorComponentType::Dim2: + return TensorComponentType::Dim2; + break; + case ckw::TensorComponentType::Dim3: + return TensorComponentType::Dim3; + break; + case ckw::TensorComponentType::Dim4: + return TensorComponentType::Dim4; + break; + case ckw::TensorComponentType::Dim1xDim2: + return TensorComponentType::Dim1xDim2; + break; + case ckw::TensorComponentType::Dim2xDim3: + return TensorComponentType::Dim2xDim3; + break; + case ckw::TensorComponentType::Dim1xDim2xDim3: + return TensorComponentType::Dim1xDim2xDim3; + break; + case ckw::TensorComponentType::Unknown: + return TensorComponentType::Unknown; + default: + ARM_COMPUTE_ERROR("Unknown CKW tensor component"); + return TensorComponentType::Unknown; + } +} + +inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage) +{ + switch(storage) + { + case TensorStorageType::ClBufferUint8Ptr: + return ckw::TensorStorageType::BufferUint8Ptr; + break; + case TensorStorageType::ClImage2dReadOnly: + return ckw::TensorStorageType::Texture2dReadOnly; + break; + case TensorStorageType::ClImage2dWriteOnly: + return ckw::TensorStorageType::Texture2dWriteOnly; + break; + case TensorStorageType::Unknown: + return ckw::TensorStorageType::Unknown; + break; + default: + ARM_COMPUTE_ERROR("Unknown tensor storage type"); + return ckw::TensorStorageType::Unknown; + } +} +inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage) +{ + switch(storage) + { + case ckw::TensorStorageType::BufferUint8Ptr: + return TensorStorageType::ClBufferUint8Ptr; + break; + case ckw::TensorStorageType::Texture2dReadOnly: + return TensorStorageType::ClImage2dReadOnly; + break; + case ckw::TensorStorageType::Texture2dWriteOnly: + return TensorStorageType::ClImage2dWriteOnly; + break; + case ckw::TensorStorageType::Unknown: + return TensorStorageType::Unknown; + break; + default: + ARM_COMPUTE_ERROR("Unknown CKW tensor storage type"); + return TensorStorageType::Unknown; + } +} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h index d600956b4f..af766a7ece 100644 --- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h +++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h @@ -104,9 +104,11 @@ public: { return _properties; } - /** Get template writer for the component */ - virtual const IGpuTemplateComponentWriter *template_writer() const = 0; - /** Get compute kernel writer driver for the component */ + /** Get writer for the component */ + virtual const IGpuTemplateComponentWriter *template_writer() const + { + return nullptr; + } virtual const IGpuCkwComponentDriver *ckw_component_driver() const { return nullptr; diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp index d2cde40a10..c41257d18c 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp @@ -24,8 +24,11 @@ #include "ClComponentActivation.h" #include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -66,8 +69,17 @@ ClComponentActivation::ClComponentActivation(ComponentId const ArgumentPack &tensors, const Attributes &attributes) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) }, - _ckw_driver{ std::make_unique(id, tensors, attributes) } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -75,15 +87,15 @@ ClComponentActivation::~ClComponentActivation() { } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } -const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const -{ - return _ckw_driver.get(); -} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h index bb6f7c6e30..ebe8719420 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h @@ -42,8 +42,11 @@ template class ArgumentPack; /** Forward declaration */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF class ClTemplateActivation; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwActivation; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentActivation final : public IGpuKernelComponent { @@ -106,10 +109,12 @@ public: /** Allow instances of this class to be moved */ ClComponentActivation &operator=(ClComponentActivation &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; - - const IGpuCkwComponentDriver *ckw_component_driver() const override; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override @@ -118,8 +123,11 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp index 92933ae7a5..635869f817 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp @@ -26,8 +26,11 @@ #include "arm_compute/core/Error.h" #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -67,23 +70,32 @@ ClComponentCast::ClComponentCast(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) }, - _ckw_driver{ std::make_unique(id, tensors, attributes) } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer +{ + std::make_unique(id, tensors, attributes) +} +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { ARM_COMPUTE_UNUSED(attributes, settings); } ClComponentCast::~ClComponentCast() { } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } -const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const -{ - return _ckw_driver.get(); -} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h index 174f9670b3..37b8cbb6c9 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h @@ -48,8 +48,11 @@ private: }; /** Forward declaration */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF class ClTemplateCast; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwCast; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentCast final : public IGpuKernelComponent { @@ -116,10 +119,12 @@ public: ClComponentCast(ClComponentCast &&component) = default; /** Allow instances of this class to be moved */ ClComponentCast &operator=(ClComponentCast &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; - /** Get GPU kernel writer for the component */ +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -127,8 +132,11 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 52739e23c0..88d729170c 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -25,8 +25,11 @@ #include "arm_compute/core/Validate.h" #include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -106,28 +109,38 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack &t return Status{}; } +ClComponentElementwiseBinary::~ClComponentElementwiseBinary() +{ +} ClComponentElementwiseBinary::ClComponentElementwiseBinary( ComponentId id, const Properties &properties, const ArgumentPack &tensors, const Attributes &attributes) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) }, - _ckw_driver{ std::make_unique(id, tensors, attributes) } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors, attributes) } -ClComponentElementwiseBinary::~ClComponentElementwiseBinary() +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors, attributes) } -const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _component_writer.get(); } +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _ckw_driver.get(); + return _component_writer.get(); } + } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h index a56dd8b37d..f7175903d0 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -40,8 +40,11 @@ template class ArgumentPack; /** Forward declaration */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF class ClTemplateElementwiseBinary; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwElementwiseBinary; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentElementwiseBinary final : public IGpuKernelComponent { @@ -101,10 +104,13 @@ public: ClComponentElementwiseBinary(ClComponentElementwiseBinary &&component) = default; /** Allow instances of this class to be moved */ ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -112,10 +118,13 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */ +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp index a3283b1866..12b81c3d56 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp @@ -24,8 +24,11 @@ #include "ClComponentStore.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h" +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h" +#else //ACL_INTERNAL_TEST_CKW_IN_DF +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h" +#endif //ACL_INTERNAL_TEST_CKW_IN_DF #include @@ -43,19 +46,30 @@ Status ClComponentStore::validate( return Status{}; } ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack &tensors) - : IGpuKernelComponent{ id, properties, tensors }, _component_writer{ std::make_unique(id, tensors) }, _ckw_driver{ std::make_unique(id, tensors) } + : IGpuKernelComponent{ id, properties, tensors }, +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors) } -ClComponentStore::~ClComponentStore() +#else //ACL_INTERNAL_TEST_CKW_IN_DF + _component_writer { + std::make_unique(id, tensors) } -const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _component_writer.get(); } +ClComponentStore::~ClComponentStore() +{ +} +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF +const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const +#endif //ACL_INTERNAL_TEST_CKW_IN_DF { - return _ckw_driver.get(); + return _component_writer.get(); } } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h index f168ccb97e..853ee39012 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h @@ -25,7 +25,6 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h" #include namespace arm_compute @@ -39,7 +38,11 @@ namespace dynamic_fusion /** Forward declaration */ template class ArgumentPack; +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF +class ClTemplateStore; +#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwStore; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentStore final : public IGpuKernelComponent { @@ -85,10 +88,12 @@ public: ClComponentStore(ClComponentStore &&component) = default; /** Allow instances of this class to be moved */ ClComponentStore &operator=(ClComponentStore &&component) = default; - /** Get template writer for the component */ + /** Get writer for the component */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF const IGpuTemplateComponentWriter *template_writer() const override; - +#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -96,8 +101,11 @@ public: } private: +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr _component_writer; - std::unique_ptr _ckw_driver; +#else //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr _component_writer; +#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp index 7ab7c8a3fc..71b0114225 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h" #include "tests/CL/CLAccessor.h" @@ -432,3 +433,5 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp index f27a1796c9..5ab1fafe2f 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/framework/Fixture.h" @@ -249,3 +250,5 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp index f4478db42b..7f5efd662a 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" #include "tests/CL/CLAccessor.h" @@ -132,7 +133,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionGpuPool2dFixture, framework validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunSpecial, DFSpecialGpuPool2dFixture, framework::DatasetMode::ALL, combine(datasets::PoolingLayerDatasetSpecialDynamicFusion(), - framework::dataset::make("DataType", DataType::F32))) + framework::dataset::make("DataType", DataType::F32))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -231,3 +232,5 @@ TEST_SUITE_END() // CL } } } + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp index bdaa1be531..4d038b2780 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw #include "tests/CL/CLAccessor.h" #include "tests/datasets/ReshapeLayerDataset.h" #include "tests/framework/Macros.h" @@ -121,3 +122,5 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp index 5f99cd6d78..9ca1c5f0da 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h" #include "tests/CL/CLAccessor.h" @@ -517,3 +518,5 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp index e8314d700d..340f5dc2a3 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp @@ -21,6 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw #include "arm_compute/core/Types.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h" @@ -196,3 +197,5 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute + +#endif // ACL_INTERNAL_TEST_CKW_IN_DF -- cgit v1.2.1