From 8c49f16e5909a9bd5dc6e68638d2e2d8acc2fc66 Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Fri, 16 Jun 2023 09:52:50 +0100 Subject: Add helpers to set CKW tensor components as OpenCL kernel arguments * Define ckw::TensorStorage. The tensor storage represents the type of tensor memory object. * Add helper functions for setting the CKW TensorComponent and TensorStorage as OpenCL kernel arguments. * Refactor CL Image2D method for simpler image object creation. Resolves: COMPMID-5784 Change-Id: I2d37d06783c1dc55f3b5692b44eb49b151f2401c Signed-off-by: Jakub Sujak Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9807 Tested-by: Arm Jenkins Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- Android.bp | 1 + SConscript | 5 ++ SConstruct | 7 +- compute_kernel_writer/CMakeLists.txt | 6 -- compute_kernel_writer/include/ckw/TensorInfo.h | 10 +++ filelist.json | 7 +- scripts/clang_tidy_rules.py | 3 +- src/core/CL/CLUtils.cpp | 40 +++++++++-- src/core/CL/CLUtils.h | 10 +++ src/core/CL/ckw/KernelArgumentsHelpers.cpp | 97 ++++++++++++++++++++++++++ src/core/CL/ckw/KernelArgumentsHelpers.h | 62 ++++++++++++++++ src/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 27 ++----- 12 files changed, 241 insertions(+), 34 deletions(-) create mode 100644 src/core/CL/ckw/KernelArgumentsHelpers.cpp create mode 100644 src/core/CL/ckw/KernelArgumentsHelpers.h diff --git a/Android.bp b/Android.bp index 3f7bf03d07..cb41b1ac39 100644 --- a/Android.bp +++ b/Android.bp @@ -228,6 +228,7 @@ cc_library_static { "src/core/CL/ICLSimpleKernel.cpp", "src/core/CL/ICLTensor.cpp", "src/core/CL/OpenCL.cpp", + "src/core/CL/ckw/KernelArgumentsHelpers.cpp", "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp", "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp", "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp", diff --git a/SConscript b/SConscript index be062e1d85..63009c6a05 100644 --- a/SConscript +++ b/SConscript @@ -529,6 +529,11 @@ if env['fixed_format_kernels']: if env['experimental_dynamic_fusion']: lib_files += filelist['experimental']['dynamic_fusion'] +# Compute Kernel Writer integration files +if env['ckw']: + if env['opencl']: + lib_files += filelist['experimental']['ckw']['cl'] + # Logging files if env["logging"]: lib_files += filelist['logging'] diff --git a/SConstruct b/SConstruct index 03b0f918e2..419fa33558 100644 --- a/SConstruct +++ b/SConstruct @@ -126,7 +126,7 @@ vars.AddVariables(     ├── datasets     ├── fixtures     └── Neon\n""", "", PathVariable.PathAccept), - BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files", False), + BoolVariable("experimental_dynamic_fusion", "Build the experimental dynamic fusion files. This option also enables opencl=1 and ckw=1 on which it has a direct dependency.", False), BoolVariable("fixed_format_kernels", "Enable fixed format kernels for GEMM", False), BoolVariable("mapfile", "Generate a map file", False), ListVariable("custom_options", "Custom options that can be used to turn on/off features", "none", ["disable_mmla_fp"]), @@ -215,6 +215,11 @@ if env['os'] == 'bare_metal': print("ERROR: OpenMP and C++11 threads not supported in bare_metal. Use cppthreads=0 openmp=0") Exit(1) +if env['experimental_dynamic_fusion']: + # Dynamic Fusion on GPU has a direct dependency on OpenCL and Compute Kernel Writer + env['opencl'] = 1 + env['ckw'] = 1 + if env['opencl'] and env['embed_kernels'] and env['compress_kernels'] and env['os'] not in ['android']: print("Compressed kernels are supported only for android builds") Exit(1) diff --git a/compute_kernel_writer/CMakeLists.txt b/compute_kernel_writer/CMakeLists.txt index f203a18f56..93372de3db 100644 --- a/compute_kernel_writer/CMakeLists.txt +++ b/compute_kernel_writer/CMakeLists.txt @@ -120,12 +120,6 @@ target_include_directories(ckw PRIVATE ${CMAKE_CURRENT_LIST_DIR} ) -set_target_properties(ckw - PROPERTIES - SOVERSION ${CMAKE_PROJECT_VERSION_MAJOR} - VERSION ${CMAKE_PROJECT_VERSION} - ) - #--------------------------------------------------------------------- # Validation tests diff --git a/compute_kernel_writer/include/ckw/TensorInfo.h b/compute_kernel_writer/include/ckw/TensorInfo.h index b5f76cffa5..44846bc94c 100644 --- a/compute_kernel_writer/include/ckw/TensorInfo.h +++ b/compute_kernel_writer/include/ckw/TensorInfo.h @@ -86,6 +86,16 @@ enum class TensorComponent : uint32_t Dim1xDim2xDim3 = 0x08001110 }; +/** Compute Kernel Writer tensor storage. The tensor storage represents the type of tensor memory object. + */ +enum class TensorStorage : uint32_t +{ + Unknown = 0x00000000, + BufferUint8Ptr = 0x01000000, + Texture2dReadOnly = 0x02000001, + Texture2dWriteOnly = 0x02000010, +}; + /** Compute Kernel Writer tensor shape * Negative dimensions can be interpreted as dynamic dimensions by the Compute Kernel Writer */ diff --git a/filelist.json b/filelist.json index f354e69398..0e18b37f5c 100644 --- a/filelist.json +++ b/filelist.json @@ -2337,6 +2337,11 @@ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp" - ] + ], + "ckw": { + "cl": [ + "src/core/CL/ckw/KernelArgumentsHelpers.cpp" + ] + } } } diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 3e98e85ad3..56b33f3922 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -28,7 +28,8 @@ import re import sys def get_list_includes(): - return "src/cpu/kernels/assembly " \ + return "compute_kernel_writer/include " \ + "src/cpu/kernels/assembly " \ "src/core/NEON/kernels/assembly " \ "src/core/NEON/kernels/convolution/winograd " \ "include/linux include " \ diff --git a/src/core/CL/CLUtils.cpp b/src/core/CL/CLUtils.cpp index 84cf88e099..709f8fa971 100644 --- a/src/core/CL/CLUtils.cpp +++ b/src/core/CL/CLUtils.cpp @@ -22,8 +22,8 @@ * SOFTWARE. */ #include "arm_compute/core/CL/CLCompileContext.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Types.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/Validate.h" #include "support/StringSupport.h" @@ -32,8 +32,40 @@ namespace arm_compute { -cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType type) +cl::Image2D create_image2d_from_tensor(const ICLTensor *tensor, CLImage2DType image_type) { + ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); + + const cl::Context &ctx = CLKernelLibrary::get().context(); + const cl::Buffer &buffer = tensor->cl_buffer(); + const ITensorInfo *info = tensor->info(); + ARM_COMPUTE_ERROR_ON_MSG(info->lock_paddings(), + "Tensor paddings must not be locked to allow extending paddings to satisfy cl_image pitch alignment requirement"); + + const size_t image_w{ info->dimension(0) / 4 }; + const size_t image_h{ info->tensor_shape().total_size() / info->dimension(0) }; + const size_t max_image_w{ CLKernelLibrary::get().get_device().getInfo() }; + const size_t max_image_h{ CLKernelLibrary::get().get_device().getInfo() }; + + ARM_COMPUTE_UNUSED(max_image_w, max_image_h); + ARM_COMPUTE_ERROR_ON_MSG(image_w > max_image_w, "Image width exceeds maximum width for exporting to cl_image"); + ARM_COMPUTE_ERROR_ON_MSG(image_h > max_image_h, "Image height exceeds maximum height for exporting to cl_image"); + + const TensorShape shape2d(image_w, image_h); + const size_t image_row_pitch = info->strides_in_bytes()[1]; + + return create_image2d_from_buffer(ctx, buffer, shape2d, info->data_type(), image_row_pitch, image_type); +} + +cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer &buffer, const TensorShape &shape2d, DataType data_type, size_t image_row_pitch, CLImage2DType image_type) +{ + ARM_COMPUTE_ERROR_ON_MSG(!image2d_from_buffer_supported(CLKernelLibrary::get().get_device()), + "The extension cl_khr_image2d_from_buffer is not supported on the target platform"); + ARM_COMPUTE_ERROR_ON_MSG(get_cl_image_pitch_alignment(CLKernelLibrary::get().get_device()) == 0, + "Impossible to retrieve the cl_image pitch alignment"); + ARM_COMPUTE_ERROR_ON_MSG(buffer.get() == nullptr, + "Cannot create cl_image from empty cl_buffer"); + cl_channel_type cl_data_type; switch(data_type) @@ -61,7 +93,7 @@ cl::Image2D create_image2d_from_buffer(const cl::Context &ctx, const cl::Buffer desc.image_width = shape2d[0]; desc.image_height = shape2d[1]; - switch(type) + switch(image_type) { case CLImage2DType::ReadOnly: cl_image = clCreateImage(ctx(), CL_MEM_READ_ONLY, &format, &desc, nullptr, &err); diff --git a/src/core/CL/CLUtils.h b/src/core/CL/CLUtils.h index b31944c72f..35dbee723e 100644 --- a/src/core/CL/CLUtils.h +++ b/src/core/CL/CLUtils.h @@ -33,6 +33,7 @@ namespace arm_compute class TensorShape; class CLBuildOptions; class ITensorInfo; +class ICLTensor; /** OpenCL Image2D types */ enum class CLImage2DType @@ -41,6 +42,15 @@ enum class CLImage2DType WriteOnly }; +/** Create a cl::Image2D object from a tensor + * + * @param[in] tensor Tensor from which to construct Image 2D object + * @param[in] image_type Image 2D type (@ref CLImage2DType) + * + * @return cl::Image2D object + */ +cl::Image2D create_image2d_from_tensor(const ICLTensor *tensor, CLImage2DType image_type); + /** Create a cl::Image2D object from an OpenCL buffer * * @note The following conditions are required to create a OpenCL image object from OpenCL buffer, diff --git a/src/core/CL/ckw/KernelArgumentsHelpers.cpp b/src/core/CL/ckw/KernelArgumentsHelpers.cpp new file mode 100644 index 0000000000..3fbdc46a3a --- /dev/null +++ b/src/core/CL/ckw/KernelArgumentsHelpers.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "KernelArgumentsHelpers.h" + +namespace arm_compute +{ + +void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponent component) +{ + ARM_COMPUTE_ERROR_ON(tensor == nullptr); + + const auto *info = tensor->info(); + const auto &strides = info->strides_in_bytes(); + + switch(component) + { + case ckw::TensorComponent::OffsetFirstElement: + kernel.setArg(idx++, info->offset_first_element_in_bytes()); + break; + case ckw::TensorComponent::Stride0: + kernel.setArg(idx++, strides[0]); + break; + case ckw::TensorComponent::Stride1: + kernel.setArg(idx++, strides[1]); + break; + case ckw::TensorComponent::Stride2: + kernel.setArg(idx++, strides[2]); + break; + case ckw::TensorComponent::Stride3: + kernel.setArg(idx++, strides[3]); + break; + case ckw::TensorComponent::Stride4: + kernel.setArg(idx++, strides[4]); + break; + case ckw::TensorComponent::Dim0: + kernel.setArg(idx++, info->dimension(0)); + break; + case ckw::TensorComponent::Dim1: + kernel.setArg(idx++, info->dimension(1)); + break; + case ckw::TensorComponent::Dim2: + kernel.setArg(idx++, info->dimension(2)); + break; + case ckw::TensorComponent::Dim3: + kernel.setArg(idx++, info->dimension(3)); + break; + case ckw::TensorComponent::Dim4: + kernel.setArg(idx++, info->dimension(4)); + break; + case ckw::TensorComponent::Dim1xDim2: + kernel.setArg(idx++, info->dimension(1) * info->dimension(2)); + break; + case ckw::TensorComponent::Dim2xDim3: + kernel.setArg(idx++, info->dimension(2) * info->dimension(3)); + break; + case ckw::TensorComponent::Dim1xDim2xDim3: + kernel.setArg(idx++, info->dimension(1) * info->dimension(2) * info->dimension(3)); + break; + case ckw::TensorComponent::Unknown: + default: + ARM_COMPUTE_ERROR("Unknown tensor component"); + } +} + +void cl_add_buffer_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Buffer &buffer) +{ + kernel.setArg(idx++, buffer); +} + +void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Image &image) +{ + kernel.setArg(idx++, image); +} + +} // namespace arm_compute diff --git a/src/core/CL/ckw/KernelArgumentsHelpers.h b/src/core/CL/ckw/KernelArgumentsHelpers.h new file mode 100644 index 0000000000..b681636c26 --- /dev/null +++ b/src/core/CL/ckw/KernelArgumentsHelpers.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ARM_COMPUTE_SRC_CKW_KERNELARGUMENTSHELPERS_H +#define ARM_COMPUTE_SRC_CKW_KERNELARGUMENTSHELPERS_H + +#include "arm_compute/core/CL/ICLTensor.h" + +#include "ckw/TensorInfo.h" + +namespace arm_compute +{ + +/** Select a Compute Kernel Writer tensor component from a tensor and add to the kernel's arguments at the specified index idx. + * + * @param[in,out] kernel OpenCL kernel to configure with the provided argument. + * @param[in,out] idx Index at which to add the argument. + * @param[in] tensor Tensor from which to access the tensor component. + * @param[in] component Tensor component to select such as tensor dimensions, strides, etc. + */ +void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, ICLTensor *tensor, ckw::TensorComponent component); + +/** Add an OpenCL buffer object to the kernel's arguments at the specified index @p idx. + * + * @param[in,out] kernel OpenCL kernel to configure with the provided argument. + * @param[in,out] idx Index at which to add the argument. + * @param[in] buffer OpenCL buffer containing the tensor's data. + */ +void cl_add_buffer_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Buffer &buffer); + +/** Add an OpenCL image object to the kernel's arguments at the specified index @p idx. + * + * @param[in,out] kernel OpenCL kernel to configure with the provided argument. + * @param[in,out] idx Index at which to add the argument. + * @param[in] image OpenCL image containing the image's data. + */ +void cl_add_texture_argument(cl::Kernel &kernel, unsigned int &idx, const cl::Image &image); + +} // namespace arm_compute + +#endif //ARM_COMPUTE_SRC_CKW_KERNELARGUMENTSHELPERS_H diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index 68d7e30c9b..f01341c7b5 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -431,35 +431,20 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl if(_export_weights_to_cl_image) { - const size_t image_w = weights->info()->dimension(0) / 4; - const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3); - const TensorShape shape2d(image_w, image_h); - const size_t image_row_pitch = weights->info()->strides_in_bytes()[1]; - - // Export cl_buffer to cl_image - weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); + // Export tensor to cl_image + weights_cl_image = create_image2d_from_tensor(weights, CLImage2DType::ReadOnly); } if(_export_output_to_cl_image) { - const size_t image_w = dst->info()->dimension(0) / 4; - const size_t image_h = dst->info()->dimension(1) * dst->info()->dimension(2) * dst->info()->dimension(3); - const TensorShape shape2d(image_w, image_h); - const size_t image_row_pitch = dst->info()->strides_in_bytes()[1]; - - // Export cl_buffer to cl_image - output_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), dst->cl_buffer(), shape2d, dst->info()->data_type(), image_row_pitch, CLImage2DType::WriteOnly); + // Export tensor to cl_image + output_cl_image = create_image2d_from_tensor(dst, CLImage2DType::WriteOnly); } if(_export_input_to_cl_image) { - const size_t image_w = src->info()->dimension(0) / 4; - const size_t image_h = src->info()->dimension(1) * src->info()->dimension(2) * src->info()->dimension(3); - const TensorShape shape2d(image_w, image_h); - const size_t image_row_pitch = src->info()->strides_in_bytes()[1]; - - // Export cl_buffer to cl_image - input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), src->cl_buffer(), shape2d, src->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); + // Export tensor to cl_image + input_cl_image = create_image2d_from_tensor(src, CLImage2DType::ReadOnly); } unsigned int idx = 0; -- cgit v1.2.1