diff options
author | Adnan AlSinan <adnan.alsinan@arm.com> | 2021-09-29 16:50:46 +0100 |
---|---|---|
committer | Adnan AlSinan <adnan.alsinan@arm.com> | 2021-10-13 12:34:41 +0000 |
commit | b0608065aa3359b41ddc83dcb66adf489006fcd2 (patch) | |
tree | 3e53f556ce3ef23e8d5eebb74b5235b839fb290b /src/gpu/cl | |
parent | 5c002ec70aa20569d44a3e4c5bbcf53135364e7b (diff) | |
download | ComputeLibrary-b0608065aa3359b41ddc83dcb66adf489006fcd2.tar.gz |
Improve performance of Softmax uint8 on GPU
Resolves COMPMID-4805
Change-Id: I0acd4479f196cf9518995a60d3b57a9a49e0db57
Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6413
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Pablo Marquez Tello <pablo.tello@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-by: Freddie Liardet <frederick.liardet@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/gpu/cl')
-rw-r--r-- | src/gpu/cl/kernels/ClSoftmaxKernel.cpp | 28 |
1 files changed, 6 insertions, 22 deletions
diff --git a/src/gpu/cl/kernels/ClSoftmaxKernel.cpp b/src/gpu/cl/kernels/ClSoftmaxKernel.cpp index 4c00413469..467bbe46ce 100644 --- a/src/gpu/cl/kernels/ClSoftmaxKernel.cpp +++ b/src/gpu/cl/kernels/ClSoftmaxKernel.cpp @@ -177,8 +177,7 @@ void ClLogits1DMaxShiftExpSumKernel::configure(const CLCompileContext &compile_c const auto is_signed_qasymm8 = is_data_type_quantized_asymmetric_signed(info.input_data_type); const int min_value = is_signed_qasymm8 ? CL_SCHAR_MIN : 0; - ParallelReductionInfo parallel_reduction_info = is_parallel_reduction(reduction_dim_size); - const unsigned int vector_size = adjust_vec_size(std::get<1>(parallel_reduction_info), reduction_dim_size); + const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size); // Set build options CLBuildOptions build_opts; @@ -193,29 +192,12 @@ void ClLogits1DMaxShiftExpSumKernel::configure(const CLCompileContext &compile_c build_opts.add_option_if(is_data_type_float(dt) && (beta != 1.0f), "-DBETA=" + float_to_string_with_full_precision(beta)); build_opts.add_option_if(is_data_type_float(dt) && info.is_log, "-DLOG_SOFTMAX"); build_opts.add_option_if(is_data_type_float(dt), "-DMINVAL=" + ((dt == DataType::F16) ? std::string("-HALF_MAX") : std::string("-FLT_MAX"))); + build_opts.add_option_if(is_data_type_quantized_asymmetric(dt), "-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option_if(is_data_type_quantized_asymmetric(dt), "-DBETA=" + float_to_string_with_full_precision(beta)); build_opts.add_options_if(is_data_type_quantized_asymmetric(dt), prepare_quantized_softmax_build_options(qinfo.scale, beta).options()); cl::NDRange lws_hint(cl::NullRange); - std::string kernel_name = std::string("softmax_layer_max_shift_exp_sum_") + (is_data_type_quantized_asymmetric(dt) ? "quantized_" : ""); - - // Configure parallel kernel if needed - if(std::get<0>(parallel_reduction_info)) - { - kernel_name += "parallel"; - bool is_grid_size_pow2 = (_grid_size != 0) && ((_grid_size & (_grid_size - 1)) == 0); - build_opts.add_option_if(is_grid_size_pow2 && _grid_size <= 256, "-DGRID_SIZE=" + support::cpp11::to_string(_grid_size)); - - // Handle boundary conditions. - const unsigned int multiple_grid_size = (reduction_dim_size / vector_size) % _grid_size; - build_opts.add_option_if((multiple_grid_size != 0) || ((reduction_dim_size % vector_size) != 0), "-DNON_MULTIPLE_OF_GRID_SIZE"); - // Setting _lws_hint in this way can also communicate grid_size to ClLogits1DMaxShiftExpSumKernel::run(). - // A single workgroup performs reduction in dimension 0 in the parallel case, hence lws[0]==gws[0]. - lws_hint = cl::NDRange(_grid_size); - } - else - { - kernel_name += "serial"; - } + std::string kernel_name = std::string("softmax_layer_max_shift_exp_sum_") + (is_data_type_quantized_asymmetric(dt) ? "quantized_" : "") + "serial"; // Create kernel. _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); @@ -313,6 +295,8 @@ void ClLogits1DNormKernel::configure(const CLCompileContext &compile_context, co build_opts.add_options_if(is_quantized_asymmetric, prepare_quantized_softmax_build_options(qinfo.scale, info.beta).options()); build_opts.add_option_if(info.is_log, "-DLOG_SOFTMAX"); + build_opts.add_option_if(is_quantized_asymmetric, "-DSCALE=" + float_to_string_with_full_precision(qinfo.scale)); + build_opts.add_option_if(is_quantized_asymmetric, "-DBETA=" + float_to_string_with_full_precision(info.beta)); // Create kernel std::string kernel_name = std::string("softmax_layer_norm") + (is_quantized_asymmetric ? "_quantized" : ""); |