diff options
Diffstat (limited to 'src/gpu/cl/kernels/ClDirectConv2dKernel.cpp')
-rw-r--r-- | src/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 67 |
1 files changed, 57 insertions, 10 deletions
diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index d9271e24d9..b66163c805 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -64,6 +64,9 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *weights, co ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->num_dimensions() > 4, "Weights can be at most 4 dimensional"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_input_to_cl_image == true, "Export to CLImage is not supported for the input tensor"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.export_output_to_cl_image == true, "Export to CLImage is not supported for the output tensor"); + if(data_layout == DataLayout::NCHW) { ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights->dimension(width_idx) != weights->dimension(height_idx), "Weights should have same width and height"); @@ -210,14 +213,26 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT const unsigned int pad_left = conv_info.pad_left(); const unsigned int pad_top = conv_info.pad_top(); - _export_to_cl_image = desc.export_weights_to_cl_image; + _export_weights_to_cl_image = desc.export_weights_to_cl_image; + _export_input_to_cl_image = desc.export_input_to_cl_image; + _export_output_to_cl_image = desc.export_output_to_cl_image; // Update the padding for the weights tensor if we can export to cl_image - if(_export_to_cl_image) + if(_export_weights_to_cl_image) { gemm::update_padding_for_cl_image(weights); } + if(_export_output_to_cl_image) + { + gemm::update_padding_for_cl_image(dst); + } + + if(_export_input_to_cl_image) + { + gemm::update_padding_for_cl_image(src); + } + if(biases != nullptr) { build_options.add_option(std::string("-DHAS_BIAS")); @@ -241,7 +256,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-cl-fast-relaxed-math"); } - build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER"); + build_options.add_option_if_else(_export_input_to_cl_image, "-DSRC_TENSOR_TYPE=IMAGE", "-DSRC_TENSOR_TYPE=BUFFER"); build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(0))); build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(1))); @@ -249,9 +264,9 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(0))); build_options.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(1))); build_options.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(2))); - build_options.add_option("-DDST_TENSOR_TYPE=BUFFER"); + build_options.add_option_if_else(_export_output_to_cl_image, "-DDST_TENSOR_TYPE=IMAGE", "-DDST_TENSOR_TYPE=BUFFER"); build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type)); - build_options.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); + build_options.add_option_if_else(_export_weights_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx))); build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx))); build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type())); @@ -307,7 +322,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT } else { - _export_to_cl_image = false; + _export_weights_to_cl_image = false; kernel_name << "direct_convolution_nchw"; build_options.add_option_if(biases != nullptr, std::string("-DHAS_BIAS")); @@ -399,8 +414,10 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl if(_data_layout == DataLayout::NHWC) { cl::Image2D weights_cl_image; + cl::Image2D output_cl_image; + cl::Image2D input_cl_image; - if(_export_to_cl_image) + if(_export_weights_to_cl_image) { const size_t image_w = weights->info()->dimension(0) / 4; const size_t image_h = weights->info()->dimension(1) * weights->info()->dimension(2) * weights->info()->dimension(3); @@ -408,13 +425,43 @@ void ClDirectConv2dKernel::run_op(ITensorPack &tensors, const Window &window, cl const size_t image_row_pitch = weights->info()->strides_in_bytes()[1]; // Export cl_buffer to cl_image - weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch); + weights_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), weights->cl_buffer(), shape2d, weights->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); + } + + if(_export_output_to_cl_image) + { + const size_t image_w = dst->info()->dimension(0) / 4; + const size_t image_h = dst->info()->dimension(1) * dst->info()->dimension(2) * dst->info()->dimension(3); + const TensorShape shape2d(image_w, image_h); + const size_t image_row_pitch = dst->info()->strides_in_bytes()[1]; + + // Export cl_buffer to cl_image + output_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), dst->cl_buffer(), shape2d, dst->info()->data_type(), image_row_pitch, CLImage2DType::WriteOnly); + } + + if(_export_input_to_cl_image) + { + const size_t image_w = src->info()->dimension(0) / 4; + const size_t image_h = src->info()->dimension(1) * src->info()->dimension(2) * src->info()->dimension(3); + const TensorShape shape2d(image_w, image_h); + const size_t image_row_pitch = src->info()->strides_in_bytes()[1]; + + // Export cl_buffer to cl_image + input_cl_image = create_image2d_from_buffer(CLKernelLibrary::get().context(), src->cl_buffer(), shape2d, src->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); } unsigned int idx = 0; + if(_export_input_to_cl_image) + { + _kernel.setArg(idx++, input_cl_image); + } add_4d_tensor_nhwc_argument(idx, src); + if(_export_output_to_cl_image) + { + _kernel.setArg(idx++, output_cl_image); + } add_4d_tensor_nhwc_argument(idx, dst); - if(_export_to_cl_image) + if(_export_weights_to_cl_image) { _kernel.setArg(idx++, weights_cl_image); } |