From fdb534293b84182a89a78b5086cf05c07ef970e8 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Wed, 25 May 2022 10:09:39 +0100 Subject: Disable unsafe FP optimizations causing accuracy issues Resolves: COMPMID-5324 Change-Id: I289b1bb42296c562cb90b918c20def8c6c1825d2 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7639 Reviewed-by: SiCong Li Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../CLDepthwiseConvolutionLayerNativeKernel.cpp | 25 ++++++++++++++++++---- src/gpu/cl/kernels/ClDirectConv2dKernel.cpp | 24 +++++++++++++++++---- src/gpu/cl/kernels/ClDirectConv3dKernel.cpp | 4 +++- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp index 61c8d90f78..d1f0338739 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -211,8 +211,25 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(weights->info()); } - build_opts.add_option("-cl-fast-relaxed-math"); - build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(conv_info.act_info.activation()))); + // Conditions of -cl-fast-relaxed-math causing accuracy issues can be traced from COMPMID-5324 + const GPUTarget gpu_target = get_target(); + const auto act_function = conv_info.act_info.activation(); + const auto dst_data_type = _output->info()->data_type(); + + if((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) + && (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU || act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + && (dst_data_type == DataType::F32 || dst_data_type == DataType::F16)) + { + // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations + // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations + build_opts.add_option("-cl-unsafe-math-optimizations"); + } + else + { + build_opts.add_option("-cl-fast-relaxed-math"); + } + + build_opts.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_function))); build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(conv_info.depth_multiplier)); build_opts.add_option("-DSRC_TENSOR_TYPE=BUFFER"); // Note: SRC_DATA_TYPE must have the same data type of WEI_DATA_TYPE. In quantized, we could @@ -220,7 +237,7 @@ void CLDepthwiseConvolutionLayerNativeKernel::configure(const CLCompileContext & // only works when both have same data type, we have to change the offset to take into account this aspect build_opts.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(_input->info()->data_type())); build_opts.add_option("-DDST_TENSOR_TYPE=BUFFER"); - build_opts.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(_output->info()->data_type())); + build_opts.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type)); build_opts.add_option_if_else(_export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); build_opts.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(_weights->info()->dimension(1))); build_opts.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(_weights->info()->dimension(2))); diff --git a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp index ff8c2c32a0..be4c8ef5b7 100644 --- a/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv2dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -253,11 +253,27 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option(std::string("-DBIA_DATA_TYPE=" + get_cl_type_from_data_type(biases->data_type()))); } - build_options.add_option("-cl-fast-relaxed-math"); + // Conditions of -cl-fast-relaxed-math causing accuracy issues can be traced from COMPMID-5324 + const auto act_function = act_info.activation(); + const auto dst_data_type = dst->data_type(); + + if((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) + && (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU || act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + && (dst_data_type == DataType::F32 || dst_data_type == DataType::F16)) + { + // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations + // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations + build_options.add_option("-cl-unsafe-math-optimizations"); + } + else + { + build_options.add_option("-cl-fast-relaxed-math"); + } + build_options.add_option("-DSRC_TENSOR_TYPE=BUFFER"); build_options.add_option("-DSRC_DATA_TYPE=" + get_cl_type_from_data_type(src->data_type())); build_options.add_option("-DDST_TENSOR_TYPE=BUFFER"); - build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst->data_type())); + build_options.add_option("-DDST_DATA_TYPE=" + get_cl_type_from_data_type(dst_data_type)); build_options.add_option_if_else(export_to_cl_image, "-DWEI_TENSOR_TYPE=IMAGE", "-DWEI_TENSOR_TYPE=BUFFER"); build_options.add_option("-DWEI_WIDTH=" + support::cpp11::to_string(weights->dimension(width_idx))); build_options.add_option("-DWEI_HEIGHT=" + support::cpp11::to_string(weights->dimension(height_idx))); @@ -271,7 +287,7 @@ void ClDirectConv2dKernel::configure(const CLCompileContext &compile_context, IT build_options.add_option("-DK0=" + support::cpp11::to_string(k0)); build_options.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); build_options.add_option_if((src->dimension(channel_idx) % k0) != 0, "-DLEFTOVER_LOOP"); - build_options.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_info.activation()))); + build_options.add_option("-DACTIVATION_TYPE=" + lower_string(string_from_activation_func(act_function))); if(is_data_type_quantized(data_type)) { diff --git a/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp b/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp index a0735b1112..79f425189a 100644 --- a/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp +++ b/src/gpu/cl/kernels/ClDirectConv3dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021 Arm Limited. + * Copyright (c) 2021-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,6 +42,8 @@ Status validate_arguments(const ITensorInfo *src0, const ITensorInfo *src1, cons { ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(src0, src1, dst); ARM_COMPUTE_RETURN_ERROR_ON_MSG(src0->data_layout() != DataLayout::NDHWC, "Only NDHWC layout supported"); + + // When fusing activation, same workaround introduced for COMPMID-5324 may be necessary ARM_COMPUTE_RETURN_ERROR_ON_MSG(conv3d_info.act_info.enabled(), "Fused activation not supported"); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src0); -- cgit v1.2.1