diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2022-12-30 16:07:45 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2023-01-10 09:57:51 +0000 |
commit | 3cce35dcad8bc8f53a1e6613f719af9ab04feda6 (patch) | |
tree | e1015566852ebce4af897db37cf5cb1989c29924 /src/gpu | |
parent | d2d9361a0a338bce478f7d85b4af70d1ed20f26c (diff) | |
download | ComputeLibrary-3cce35dcad8bc8f53a1e6613f719af9ab04feda6.tar.gz |
Extend cl image support to input and output tensors
- Add support for texture image to input and output of direct
convolution
- Extend T_LOAD2D_INDIRECT macro to read values from cl image storages
Resolves COMPMID-5715
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Change-Id: Idb0410f53f6d0763cd9e39895a7cbf9bc826d33a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8904
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu')
6 files changed, 69 insertions, 20 deletions
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index d9271e24d9..b66163c805 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,9 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_input_to_cl_image == true, "Export to CLImage is not supported for the input tensor"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_output_to_cl_image == true, "Export to CLImage is not supported for the output tensor"); + if(data_layout == DataLayout::NCHW) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != weights->dimension(height_idx), "Weights should have same width and height"); @@ -210,14 +213,26 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT const unsigned int pad_left = conv_info.pad_left(); const unsigned int pad_top = conv_info.pad_top(); - _export_to_cl_image = desc.export_weights_to_cl_image; + _export_weights_to_cl_image = desc.export_weights_to_cl_image; + _export_input_to_cl_image = desc.export_input_to_cl_image; + _export_output_to_cl_image = desc.export_output_to_cl_image; // Update the padding for the weights tensor if we can export to cl_image - if(_export_to_cl_image) + if(_export_weights_to_cl_image) { gemm::update_padding_for_cl_image(weights); } + if(_export_output_to_cl_image) + { + gemm::update_padding_for_cl_image(dst); + } + + if(_export_input_to_cl_image) + { + gemm::update_padding_for_cl_image(src); + } + if(biases != nullptr) { build_options.add_option(std::string("-DHAS_BIAS")); @@ -241,7 +256,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-cl-fast-relaxed-math"); } - build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER"); + build_options.add_option_if_else(_export_input_to_cl_image, "-DSRC_TENSOR_TYPE=IMAGE", "-DSRC_TENSOR_TYPE=BUFFER"); build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(0))); build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(1))); @@ -249,9 +264,9 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(0))); build_options.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(1))); build_options.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(2))); - build_options.add_option("-DDST_TENSOR_TYPE=BUFFER"); + build_options.add_option_if_else(_export_output_to_cl_image, "-DDST_TENSOR_TYPE=IMAGE", "-DDST_TENSOR_TYPE=BUFFER"); build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type)); - build_options.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); + build_options.add_option_if_else(_export_weights_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx))); build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx))); build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type())); @@ -307,7 +322,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT } else { - _export_to_cl_image = false; + _export_weights_to_cl_image = false; kernel_name << "direct_convolution_nchw"; build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS")); @@ -399,8 +414,10 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl if(_data_layout == DataLayout::NHWC) { cl::Image2D weights_cl_image; + cl::Image2D output_cl_image; + cl::Image2D input_cl_image; - if(_export_to_cl_image) + if(_export_weights_to_cl_image) { const size_t image_w = weights->info()->dimension(0) / 4; const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3); @@ -408,13 +425,43 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl const size_t image_row_pitch = weights->info()->strides_in_bytes()[1]; // Export cl_buffer to cl_image - weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch); + weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); + } + + if(_export_output_to_cl_image) + { + const size_t image_w = dst->info()->dimension(0) / 4; + const size_t image_h = dst->info()->dimension(1) * dst->info()->dimension(2) * dst->info()->dimension(3); + const TensorShape shape2d(image_w, image_h); + const size_t image_row_pitch = dst->info()->strides_in_bytes()[1]; + + // Export cl_buffer to cl_image + output_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), dst->cl_buffer(), shape2d, dst->info()->data_type(), image_row_pitch, CLImage2DType::WriteOnly); + } + + if(_export_input_to_cl_image) + { + const size_t image_w = src->info()->dimension(0) / 4; + const size_t image_h = src->info()->dimension(1) * src->info()->dimension(2) * src->info()->dimension(3); + const TensorShape shape2d(image_w, image_h); + const size_t image_row_pitch = src->info()->strides_in_bytes()[1]; + + // Export cl_buffer to cl_image + input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), src->cl_buffer(), shape2d, src->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } unsigned int idx = 0; + if(_export_input_to_cl_image) + { + _kernel.setArg(idx++, input_cl_image); + } add_4d_tensor_nhwc_argument(idx, src); + if(_export_output_to_cl_image) + { + _kernel.setArg(idx++, output_cl_image); + } add_4d_tensor_nhwc_argument(idx, dst); - if(_export_to_cl_image) + if(_export_weights_to_cl_image) { _kernel.setArg(idx++, weights_cl_image); } diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.h b/src/gpu/cl/kernels/ClDirectConv2dKernel.h index 0cb8aebbe1..25171a0536 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.h +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -84,7 +84,9 @@ public: public: DataLayout _data_layout{}; PadStrideInfo _conv_info{}; - bool _export_to_cl_image{ false }; + bool _export_weights_to_cl_image{ false }; + bool _export_output_to_cl_image{ false }; + bool _export_input_to_cl_image{ false }; }; } // namespace kernels } // namespace opencl diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp index 6a450b652b..f74a5d87af 100644 --- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -360,7 +360,7 @@ void ClGemmMatrixMultiplyReshapedKernel::run_op(ITensorPack &tensors, const Wind const TensorShape shape2d(src1->info()->dimension(0) / 4, src1->info()->dimension(1) * src1->info()->dimension(2)); const size_t image_row_pitch = src1->info()->strides_in_bytes()[1]; - src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch); + src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } do diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp index a8bcf8d6a1..efd0a95eab 100644 --- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022 Arm Limited. + * Copyright (c) 2019-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -375,7 +375,7 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsKernel::run_op(ITensorPack &tensors, con const TensorShape shape2d(src1->info()->dimension(0) / 4, src1->info()->dimension(1) * src1->info()->dimension(2)); const size_t image_row_pitch = src1->info()->strides_in_bytes()[1]; - src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch); + src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } do diff --git a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp index fe46913517..f252afb06a 100644 --- a/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -325,7 +325,7 @@ void ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel::run_op(ITensorPack &tensors, const TensorShape shape2d(src1->info()->dimension(0) / 4, src1->info()->dimension(1) * src1->info()->dimension(2)); const size_t image_row_pitch = src1->info()->strides_in_bytes()[1]; - src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch); + src1_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), src1->cl_buffer(), shape2d, src1->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } Window slice = window.first_slice_window_3D(); diff --git a/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp index 3448377cb5..b8915cc406 100644 --- a/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClIndirectConv2dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -282,7 +282,7 @@ void ClIndirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, const size_t image_row_pitch = weights->info()->strides_in_bytes()[1]; // Export cl_buffer to cl_image - weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch); + weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } unsigned int idx = 0; |