From 2a86a30a5d9c047b0ec73a699b09a128f3fcb55e Mon Sep 17 00:00:00 2001 From: ramelg01 Date: Fri, 4 Feb 2022 20:49:14 +0000 Subject: Improve start-up time for winograd_input_transform_*_nhwc - pass tensor's dimensions at runtime rather than compile time - Add guard macro to compile only kernel(s) of internest Resolves: COMPMID-5119 Signed-off-by: Ramy Elgammal Change-Id: Ib01098e397011a1201c2800c62a8954ec70e63e8 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7083 Tested-by: Arm Jenkins Reviewed-by: Giorgio Arena Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp | 16 ++++++++++------ src/gpu/cl/kernels/ClWinogradInputTransformKernel.h | 4 +++- 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'src/gpu/cl') diff --git a/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp b/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp index 58874216bb..d6b038f0f8 100644 --- a/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp +++ b/src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -147,10 +147,8 @@ void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_c if(_data_layout == DataLayout::NHWC) { build_opts.add_option("-DNHWC"); - build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(src->dimension(idx_w))); - build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(idx_h))); - build_opts.add_option("-DNUM_TILES_X=" + support::cpp11::to_string(_num_tiles_x)); - build_opts.add_option("-DNUM_TILES_Y=" + support::cpp11::to_string(_num_tiles_y)); + _src_width = src->dimension(idx_w); + _src_height = src->dimension(idx_h); build_opts.add_option("-DPAD_LEFT=" + support::cpp11::to_string(conv_info.pad_left())); build_opts.add_option("-DPAD_TOP=" + support::cpp11::to_string(conv_info.pad_top())); build_opts.add_option("-DOUTPUT_TILE_W=" + support::cpp11::to_string(output_tile_size.width)); @@ -189,6 +187,8 @@ void ClWinogradInputTransformKernel::configure(const ClCompileContext &compile_c kernel_name += support::cpp11::to_string(_step_z); kernel_name += "_" + lower_string(string_from_data_layout(_data_layout)); + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Create window and update padding @@ -247,6 +247,10 @@ void ClWinogradInputTransformKernel::run_op(ITensorPack &tensors, const Window & unsigned int idx = 0; add_4D_tensor_argument(idx, src, slice); add_4D_tensor_argument(idx, dst, slice); + _kernel.setArg(idx++, _src_width); + _kernel.setArg(idx++, _src_height); + _kernel.setArg(idx++, _num_tiles_x); + _kernel.setArg(idx++, _num_tiles_y); enqueue(queue, *this, slice, lws_hint()); } else @@ -275,4 +279,4 @@ void ClWinogradInputTransformKernel::run_op(ITensorPack &tensors, const Window & } } // namespace kernels } // namespace opencl -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/src/gpu/cl/kernels/ClWinogradInputTransformKernel.h b/src/gpu/cl/kernels/ClWinogradInputTransformKernel.h index 631f427b82..c10c528b9b 100644 --- a/src/gpu/cl/kernels/ClWinogradInputTransformKernel.h +++ b/src/gpu/cl/kernels/ClWinogradInputTransformKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -80,6 +80,8 @@ private: int _num_tiles_x{ 0 }; int _num_tiles_y{ 0 }; unsigned int _step_z{ 1 }; + int32_t _src_width{ 0 }; + int32_t _src_height{ 0 }; }; } // namespace kernels } // namespace opencl -- cgit v1.2.1