From d11de9861e6c32fa389f503e037098f50ffed156 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 5 Sep 2022 15:35:35 +0100 Subject: Add a macro guard in all OpenCL kernels in gemmlowp.cl Resolves COMPMID-5498 Change-Id: I474f3f963257014255d082aab0ccbe3efe5aa067 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8222 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Comments-Addressed: Ramy Elgammal Reviewed-by: Ramy Elgammal Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp | 5 ++++- .../cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp | 5 ++++- .../ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp | 5 ++++- src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp | 5 ++++- .../ClGemmLowpOffsetContributionOutputStageKernel.cpp | 5 ++++- .../ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp | 8 ++++++-- .../ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp | 9 +++++++-- .../cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp | 9 +++++++-- src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp | 12 ++++++++++-- 9 files changed, 50 insertions(+), 13 deletions(-) (limited to 'src/gpu/cl') diff --git a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp index cb03c6255f..bad3d25d22 100644 --- a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -228,6 +228,9 @@ void ClGemmLowpMatrixMultiplyNativeKernel::configure(const CLCompileContext &com build_opts.add_option("-DPARTIAL_STORE_N0=" + support::cpp11::to_string(partial_store_n0)); std::string kernel_name("gemmlowp_mm_native"); + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); diff --git a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp index 6446b4ce38..0325c00a5c 100644 --- a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -193,6 +193,9 @@ void ClGemmLowpMatrixMultiplyReshapedKernel::configure(const CLCompileContext &c kernel_name += lhs_info.transpose ? "lhs_t_" : "lhs_nt_"; kernel_name += rhs_info.transpose ? "rhs_t" : "rhs_nt"; + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); diff --git a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp index bacf07fb4b..386c13eb92 100644 --- a/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -390,6 +390,9 @@ void ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel::configure(const CLCompileCon build_opts.add_option_if(max != max_val.get(), "-DMAX_BOUND=" + support::cpp11::to_string(max)); } + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); diff --git a/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp index 5d2561d0dc..a8efd0610b 100644 --- a/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -142,6 +142,9 @@ void ClGemmLowpOffsetContributionKernel::configure(const CLCompileContext &compi std::string kernel_name("gemmlowp_offset_contribution"); + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); diff --git a/src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp index a8a8207504..a1697254cc 100644 --- a/src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -187,6 +187,9 @@ void ClGemmLowpOffsetContributionOutputStageKernel::configure(const CLCompileCon std::string kernel_name("gemmlowp_offset_contribution"); kernel_name += "_" + string_from_gemmlowp_output_stage(output_stage.type); + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); diff --git a/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp index c50023c3dd..795f3174a2 100644 --- a/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -114,7 +114,11 @@ void ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel::configure(const CLCompi // Create kernel const std::string kernel_name = (info->output_data_type == DataType::QSYMM16) ? "gemmlowp_output_stage_quantize_down_fixedpoint_qsymm16" : "gemmlowp_output_stage_quantize_down_fixedpoint"; - _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); + + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window auto win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); diff --git a/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp index c5cea3d17d..8d4cb923d6 100644 --- a/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -113,8 +113,13 @@ void ClGemmLowpQuantizeDownInt32ScaleByFloatKernel::configure(const CLCompileCon build_opts.add_option_if((max < 255), "-DMAX_BOUND=" + support::cpp11::to_string(max)); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); + const std::string kernel_name = "gemmlowp_output_stage_quantize_down_float"; + + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel - _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down_float", build_opts.options()); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); diff --git a/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp index 5469ea9602..bad9d961b8 100644 --- a/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -112,8 +112,13 @@ void ClGemmLowpQuantizeDownInt32ScaleKernel::configure(const CLCompileContext &c build_opts.add_option("-DOUTPUT_DATA_TYPE=" + get_cl_type_from_data_type(dst->data_type())); build_opts.add_option_if(bias != nullptr, "-DADD_BIAS"); + const std::string kernel_name = "gemmlowp_output_stage_quantize_down"; + + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel - _kernel = create_kernel(compile_context, "gemmlowp_output_stage_quantize_down", build_opts.options()); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window Window win = calculate_max_window(*src, Steps(num_elems_processed_per_iteration)); diff --git a/src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp b/src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp index 7f6f5731d8..6ab547cfd5 100644 --- a/src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp +++ b/src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -95,6 +95,9 @@ void ClGemmLowpMatrixAReductionKernel::configure(const CLCompileContext &compile std::string kernel_name = "gemmlowp_matrix_a_reduction" + std::string(is_dot8_supported ? "_dot8" : ""); + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); @@ -171,8 +174,13 @@ void ClGemmLowpMatrixBReductionKernel::configure(const CLCompileContext &compile build_opts.add_option("-DACC_DATA_TYPE=" + get_cl_dot8_acc_type_from_data_type(mtx_b->data_type())); build_opts.add_option_if(info.mul_by_scalar, "-DSCALAR=" + support::cpp11::to_string(info.scalar)); + const std::string kernel_name = "gemmlowp_matrix_b_reduction"; + + // A macro guard to compile ONLY the kernel of interest + build_opts.add_option("-D" + upper_string(kernel_name)); + // Create kernel - _kernel = create_kernel(compile_context, "gemmlowp_matrix_b_reduction", build_opts.options()); + _kernel = create_kernel(compile_context, kernel_name, build_opts.options()); // Configure kernel window Window win = calculate_max_window(*vector_sum_col, Steps(num_elems_processed_per_iteration)); -- cgit v1.2.1