aboutsummaryrefslogtreecommitdiff
path: root/src/core/gpu
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-03-19 11:26:20 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2021-03-23 14:16:34 +0000
commit5c9eed82102b29381cfcf36a0e98d3c5239f37e5 (patch)
treefdfcbe642f4d7896e477583d74f81e7e30271465 /src/core/gpu
parent1efed925da927cc47bff6327c66f252b65c660bc (diff)
downloadComputeLibrary-5c9eed82102b29381cfcf36a0e98d3c5239f37e5.tar.gz
Extend direct convolution (F32/F16/QASYMM8)
The new function can handle different block sizes (M0, N0) New utility macros have been developed to simplify the work and the future OpenCL kernel development. In particular the work has been done to also consider cases with: - the texture pipe support - dynamic tensor shape support Change-Id: Ife4c64baf07517938bb8ad18e6a5f4579345c40f Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5297 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/gpu')
-rw-r--r--src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp27
1 files changed, 17 insertions, 10 deletions
diff --git a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp b/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp
index f071dbc468..72801fa6c8 100644
--- a/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp
+++ b/src/core/gpu/cl/kernels/ClDirectConvolutionKernel.cpp
@@ -276,10 +276,12 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *src, ITenso
if(data_layout == DataLayout::NHWC)
{
const unsigned int vec_size = std::min(static_cast<unsigned int>(dst->tensor_shape()[0]), 4u);
+ const unsigned int num_rows = dst->tensor_shape()[0] > 16 ? 2u : 1U;
// Create window and update padding
- Window win = calculate_max_window(*dst, Steps(vec_size, 1U));
+ Window win = calculate_max_window(*dst, Steps(vec_size, num_rows));
dst->set_valid_region(ValidRegion(Coordinates(), dst->tensor_shape()));
+
Status err = Status{};
return std::make_pair(err, win);
}
@@ -368,9 +370,9 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex
const unsigned int n0 = win_config.second.x().step();
const unsigned int m0 = win_config.second.y().step();
- const unsigned int k0 = adjust_vec_size(16u, src->dimension(channel_idx));
+
+ const unsigned int k0 = adjust_vec_size(8u, src->dimension(channel_idx));
const unsigned int partial_store_n0 = dst->dimension(channel_idx) % n0;
- const unsigned int partial_store_m0 = (dst->dimension(width_idx) * dst->dimension(height_idx)) % m0;
const unsigned int pad_left = conv_info.pad_left();
const unsigned int pad_top = conv_info.pad_top();
@@ -379,14 +381,19 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex
build_options.add_option(std::string("-DHAS_BIAS"));
build_options.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->data_type())));
}
+
+ build_options.add_option("-cl-fast-relaxed-math");
+ build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER");
build_options.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(width_idx)));
build_options.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(height_idx)));
build_options.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src->dimension(channel_idx)));
build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+ build_options.add_option("-DDST_TENSOR_TYPE=BUFFER");
build_options.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(width_idx)));
build_options.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(height_idx)));
build_options.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst->dimension(channel_idx)));
build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst->data_type()));
+ build_options.add_option("-DWEI_TENSOR_TYPE=BUFFER");
build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx)));
build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx)));
build_options.add_option("-DWEI_DATA_TYPE=" + get_cl_type_from_data_type(weights->data_type()));
@@ -397,8 +404,7 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex
build_options.add_option("-DN0=" + support::cpp11::to_string(n0));
build_options.add_option("-DM0=" + support::cpp11::to_string(m0));
build_options.add_option("-DK0=" + support::cpp11::to_string(k0));
- build_options.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0));
- build_options.add_option("-DPARTIAL_STORE_M0=" + support::cpp11::to_string(partial_store_m0));
+ build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
if(is_data_type_quantized(data_type))
{
@@ -426,6 +432,7 @@ void ClDirectConvolutionKernel::configure(const CLCompileContext &compile_contex
else
{
build_options.add_option("-DACC_DATA_TYPE=" + get_cl_type_from_data_type(data_type));
+ build_options.add_option("-DZERO_VALUE=" + support::cpp11::to_string(0));
build_options.add_option("-DSRC_OFFSET=" + support::cpp11::to_string(0));
build_options.add_option("-DWEI_OFFSET=" + support::cpp11::to_string(0));
build_options.add_option("-DDST_OFFSET=" + support::cpp11::to_string(0));
@@ -529,18 +536,18 @@ void ClDirectConvolutionKernel::run_op(ITensorPack &tensors, const Window &windo
if(_data_layout == DataLayout::NHWC)
{
- slice.set(Window::DimY, Window::Dimension(0, dst->info()->dimension(1) * dst->info()->dimension(2), 1));
+ const size_t dim_y_collapsed = ceil_to_multiple(dst->info()->dimension(1) * dst->info()->dimension(2), slice.y().step());
+ slice.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, slice.y().step()));
slice.set(Window::DimZ, Window::Dimension(0, dst->info()->dimension(3), 1));
unsigned int idx = 0;
- add_3D_tensor_argument(idx, src, slice);
- add_3D_tensor_argument(idx, dst, slice);
- add_3D_tensor_argument(idx, weights, slice);
+ add_4D_tensor_argument(idx, src, slice);
+ add_4D_tensor_argument(idx, dst, slice);
+ add_4D_tensor_argument(idx, weights, slice);
if(biases != nullptr)
{
add_1D_tensor_argument(idx, biases, slice);
}
- _kernel.setArg(idx++, static_cast<unsigned int>(weights->info()->strides_in_bytes()[3]));
enqueue(queue, *this, slice, lws_hint());
}
else