From 11f099921570422e8ee1c02ecfda44201ceec092 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 27 Nov 2017 11:18:34 +0000 Subject: COMPMID-556: Fix lws_hint CLSoftmaxLayer Change-Id: I15347bcfc68d76d47efa06b6036e1ed81548d227 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110678 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Gian Marco Iodice --- src/core/CL/ICLSimple3DKernel.cpp | 2 +- src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/core/CL/ICLSimple3DKernel.cpp b/src/core/CL/ICLSimple3DKernel.cpp index 7b0d011b3e..0bd9d155cf 100644 --- a/src/core/CL/ICLSimple3DKernel.cpp +++ b/src/core/CL/ICLSimple3DKernel.cpp @@ -41,7 +41,7 @@ void ICLSimple3DKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index 53a78f7c99..5d71424189 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -288,7 +288,7 @@ void CLLogits1DShiftExpSumKernel::run(const Window &window, cl::CommandQueue &qu add_3D_tensor_argument(idx, _max, slice); add_3D_tensor_argument(idx, _output, slice); add_3D_tensor_argument(idx, _sum, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window_collapsed.slide_window_slice_3D(slice)); } @@ -334,8 +334,6 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if(is_data_type_fixed_point(dt) && (beta != 1.0f), "-DBETA=" + support::cpp11::to_string(beta_int)); build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); - // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run(). - // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0]. _lws_hint = cl::NullRange; std::string kernel_name = std::string("softmax_layer_max_shift_exp_sum_serial"); ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size); @@ -355,6 +353,9 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor // Handle boundary conditions. const unsigned int multiple_grid_size = (reduction_dim_size / vector_size) % _grid_size; build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE"); + // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run(). + // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0]. + _lws_hint = cl::NDRange(_grid_size); } // Create kernel. @@ -548,7 +549,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _sum, sum_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window_collapsed.slide_window_slice_3D(slice)); } -- cgit v1.2.1