aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-08-08 13:20:04 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commitb6eb35371d222c6b7f61210d97ebd7dd9e197458 (patch)
treeaf89729ad68d665916c37abb5fd49e512fa40614 /src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
parent1d1f32ce7ef6acea4afd4cf6a929436640b72ccd (diff)
downloadComputeLibrary-b6eb35371d222c6b7f61210d97ebd7dd9e197458.tar.gz
COMPMID-1478: Stop relying on static default OpenCL objects in cl2.hpp
This causes problems when ACL is used as a shared library on Android. Fixes some problems related to creation / destruction order between the Graph's CL backend and core / runtime Change-Id: I716d63fd42f4586df1ffbb6fa97e4db06d3a781b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/143228 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLSoftmaxLayerKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLSoftmaxLayerKernel.cpp12
1 files changed, 6 insertions, 6 deletions
diff --git a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
index b9ebdc9583..403256baae 100644
--- a/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
+++ b/src/core/CL/kernels/CLSoftmaxLayerKernel.cpp
@@ -242,7 +242,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta));
build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(input->info()->quantization_info().scale, beta).options());
- _lws_hint = cl::NullRange;
+ cl::NDRange lws_hint(cl::NullRange);
std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("softmax_layer_max_shift_exp_sum_quantized_serial") :
std::string("softmax_layer_max_shift_exp_sum_serial");
ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size);
@@ -264,7 +264,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE");
// Setting _lws_hint in this way can also communicate grid_size to CLLogits1DMaxShiftExpSumKernel::run().
// A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0].
- _lws_hint = cl::NDRange(_grid_size);
+ lws_hint = cl::NDRange(_grid_size);
}
// Create kernel.
@@ -277,7 +277,7 @@ void CLLogits1DMaxShiftExpSumKernel::configure(const ICLTensor *input, ICLTensor
// Configure window
auto win_config = validate_and_configure_window_1DMaxShiftExpSum(input->info(), max->info(), output->info(), sum->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second, lws_hint);
}
Status CLLogits1DMaxShiftExpSumKernel::validate(const ITensorInfo *input, const ITensorInfo *max, const ITensorInfo *output, const ITensorInfo *sum)
@@ -322,7 +322,7 @@ void CLLogits1DMaxShiftExpSumKernel::run(const Window &window, cl::CommandQueue
add_3D_tensor_argument(idx, _max, slice);
add_3D_tensor_argument(idx, _output, slice);
add_3D_tensor_argument(idx, _sum, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}
@@ -365,7 +365,7 @@ void CLLogits1DNormKernel::configure(const ICLTensor *input, const ICLTensor *su
// Configure window
auto win_config = validate_and_configure_window_1DNorm(input->info(), output->info(), sum->info());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
- ICLKernel::configure(win_config.second);
+ ICLKernel::configure_internal(win_config.second);
}
Status CLLogits1DNormKernel::validate(const ITensorInfo *input, const ITensorInfo *sum, const ITensorInfo *output)
@@ -394,7 +394,7 @@ void CLLogits1DNormKernel::run(const Window &window, cl::CommandQueue &queue)
add_3D_tensor_argument(idx, _input, slice);
add_3D_tensor_argument(idx, _sum, sum_slice);
add_3D_tensor_argument(idx, _output, slice);
- enqueue(queue, *this, slice, _lws_hint);
+ enqueue(queue, *this, slice, lws_hint());
}
while(window_collapsed.slide_window_slice_3D(slice));
}