From 17975a61c5d7cbdc37c11d38e23eab8afa43f27c Mon Sep 17 00:00:00 2001 From: Adnan AlSinan Date: Mon, 8 Nov 2021 17:46:39 +0000 Subject: Improve start-up time for ClScale - Add macro guard for different kernels in scale.cl - Rework TENSOR4D to the new format - Pass scale_x and scale_y at runtime Resolves COMPMID-4886 Signed-off-by: Adnan AlSinan Change-Id: Ib904a703d511fb8260618057ac92e5ea9efeee2b Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6619 Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/gpu/cl/kernels/ClScaleKernel.cpp | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'src/gpu/cl/kernels/ClScaleKernel.cpp') diff --git a/src/gpu/cl/kernels/ClScaleKernel.cpp b/src/gpu/cl/kernels/ClScaleKernel.cpp index d63c0e1754..6f16adc657 100644 --- a/src/gpu/cl/kernels/ClScaleKernel.cpp +++ b/src/gpu/cl/kernels/ClScaleKernel.cpp @@ -117,9 +117,7 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn const int idx_channel = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::CHANNEL); const unsigned int src_width = src->dimension(idx_width); const unsigned int src_height = src->dimension(idx_height); - const unsigned int src_channel = src->dimension(idx_channel); const unsigned int dst_width = dst->dimension(idx_width); - const unsigned int dst_height = dst->dimension(idx_height); const unsigned int dst_channels = dst->dimension(idx_channel); unsigned int vec_size = 0; unsigned int vec_size_leftover = 0; @@ -130,20 +128,13 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn vec_size = adjust_vec_size(src->data_type() == DataType::F32 ? 4 : 8, dst_channels); vec_size_leftover = dst_channels % vec_size; build_opts.add_option("-DSRC_TENSOR_TYPE=BUFFER"); - build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src_width)); - build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src_height)); - build_opts.add_option("-DSRC_CHANNELS=" + support::cpp11::to_string(src_channel)); build_opts.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_opts.add_option("-DDST_TENSOR_TYPE=BUFFER"); - build_opts.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst_width)); - build_opts.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst_height)); - build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(dst_channels)); build_opts.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst->data_type())); build_opts.add_option("-DCONSTANT_VALUE=" + string_from_pixel_value(info.constant_border_value, src->data_type())); - build_opts.add_option("-DSCALE_X=" + float_to_string_with_full_precision(scale_x)); - build_opts.add_option("-DSCALE_Y=" + float_to_string_with_full_precision(scale_y)); build_opts.add_option("-DN0=" + support::cpp11::to_string(vec_size)); build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(vec_size_leftover)); + build_opts.add_option("-DSCALE_" + string_from_interpolation_policy(interpolation_policy_to_use)); build_opts.add_option_if(src->num_dimensions() > 3, "-DBATCHED_EXECUTION"); build_opts.add_option_if(info.border_mode == BorderMode::REPLICATE, "-DBORDER_MODE_REPLICATE"); build_opts.add_option_if(info.border_mode == BorderMode::CONSTANT, "-DBORDER_MODE_CONSTANT"); @@ -203,6 +194,13 @@ void ClScaleKernel::configure(const CLCompileContext &compile_context, ITensorIn ARM_COMPUTE_ERROR_ON(has_padding_changed(padding_info)); + // Pass scale kernel arguments + if(is_nhwc) + { + unsigned int idx = 2 * num_arguments_per_4d_tensor_nhwc(); + _kernel.setArg(idx++, scale_x); + _kernel.setArg(idx++, scale_y); + } // Set config_id for enabling LWS tuning _config_id = "scale_"; _config_id += (info.border_mode == BorderMode::REPLICATE ? "Bord_rep" : ""); @@ -248,8 +246,8 @@ void ClScaleKernel::run_op(ITensorPack &tensors, const Window &window, cl::Comma Window slice = collapsed.first_slice_window_4D(); unsigned int idx = 0; - add_4D_tensor_argument(idx, src, slice); - add_4D_tensor_argument(idx, dst, slice); + add_4d_tensor_nhwc_argument(idx, src); + add_4d_tensor_nhwc_argument(idx, dst); enqueue(queue, *this, slice, lws_hint()); break; } -- cgit v1.2.1