diff options
Diffstat (limited to 'src/core/CL/kernels/CLSoftmaxLayerKernel.cpp')
-rw-r--r-- | src/core/CL/kernels/CLSoftmaxLayerKernel.cpp | 12 |
1 files changed, 6 insertions, 6 deletions
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp index b9ebdc9583..403256baae 100644 --- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp +++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp @@ -242,7 +242,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options()); - _lws_hint = cl::NullRange; + cl::NDRange lws_hint(cl::NullRange); std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") : std::string("softmax_layer_max_shift_exp_sum_serial"); ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size); @@ -264,7 +264,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE"); // Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run(). // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0]. - _lws_hint = cl::NDRange(_grid_size); + lws_hint = cl::NDRange(_grid_size); } // Create kernel. @@ -277,7 +277,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor // Configure window auto win_config = validate_and_configure_window_1DMaxShiftExpSum(input->info(), max->info(), output->info(), sum->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second, lws_hint); } Status CLLogits1DMaxShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum) @@ -322,7 +322,7 @@ void CLLogits1DMaxShiftExpSumKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _max, slice); add_3D_tensor_argument(idx, _output, slice); add_3D_tensor_argument(idx, _sum, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } @@ -365,7 +365,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su // Configure window auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info()); ARM_COMPUTE_ERROR_THROW_ON(win_config.first); - ICLKernel::configure(win_config.second); + ICLKernel::configure_internal(win_config.second); } Status CLLogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output) @@ -394,7 +394,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue) add_3D_tensor_argument(idx, _input, slice); add_3D_tensor_argument(idx, _sum, sum_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice, _lws_hint); + enqueue(queue, *this, slice, lws_hint()); } while(window_collapsed.slide_window_slice_3D(slice)); } |