aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2017-11-27 11:18:34 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:41:17 +0000
commit11f099921570422e8ee1c02ecfda44201ceec092 (patch)
tree334e0cf7079f68f399a22047869788b49f9c81ba
parent27066c2bed8fb88843308a70f375fd49835edd55 (diff)
downloadComputeLibrary-11f099921570422e8ee1c02ecfda44201ceec092.tar.gz
COMPMID-556: Fix lws_hint CLSoftmaxLayer
Change-Id: I15347bcfc68d76d47efa06b6036e1ed81548d227 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110678 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
-rw-r--r--src/core/CL/ICLSimple3DKernel.cpp2
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp9
2 files changed, 6 insertions, 5 deletions
diff --git a/src/core/CL/ICLSimple3DKernel.cpp b/src/core/CL/ICLSimple3DKernel.cpp
index 7b0d011b3e..0bd9d155cf 100644
--- a/src/core/CL/ICLSimple3DKernel.cpp
+++ b/src/core/CL/ICLSimple3DKernel.cpp
@@ -41,7 +41,7 @@ void ICLSimple3DKernel::run(const Window &window, cl::CommandQueue &queue)
unsigned int idx = 0;
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, _lws_hint);
}
while(window.slide_window_slice_3D(slice));
}
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index 53a78f7c99..5d71424189 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -288,7 +288,7 @@ void CLLogits1DShiftExpSumKernel::run(const Window &window, cl::CommandQueue &qu
add_3D_tensor_argument(idx, _max, slice);
add_3D_tensor_argument(idx, _output, slice);
add_3D_tensor_argument(idx, _sum, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, _lws_hint);
}
while(window_collapsed.slide_window_slice_3D(slice));
}
@@ -334,8 +334,6 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
build_opts.add_option_if(is_data_type_fixed_point(dt) && (beta != 1.0f), "-DBETA=" + support::cpp11::to_string(beta_int));
build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
- // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run().
- // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0].
_lws_hint = cl::NullRange;
std::string kernel_name = std::string("softmax_layer_max_shift_exp_sum_serial");
ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size);
@@ -355,6 +353,9 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
// Handle boundary conditions.
const unsigned int multiple_grid_size = (reduction_dim_size / vector_size) % _grid_size;
build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE");
+ // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run().
+ // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0].
+ _lws_hint = cl::NDRange(_grid_size);
}
// Create kernel.
@@ -548,7 +549,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue)
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _sum, sum_slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice);
+ enqueue(queue, *this, slice, _lws_hint);
}
while(window_collapsed.slide_window_slice_3D(slice));
}