Disable unsafe FP optimizations in Winograd Output Transform

The unsafe FP optimizations flag causes accuracy issues when certain conditions are met regarding the hardware type, data type and the activation function. Resolves: COMPMID-5375 Change-Id: I1b0b06549b8c108617962d006a20dd263d5e3c21 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8061 Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Gunes Bayir <gunes.bayir@arm.com> 2022-08-10 15:58:51 +0100
committer: Ramy Elgammal <ramy.elgammal@arm.com> 2022-08-11 12:36:34 +0100
commit: c4fe78527c12910d380d8eb2e5e4ed6a52df3a65 (patch)
tree: bd02ca981bb6a3d92c134d96187757d70a812128
parent: dde3756a07ee5c65ac96b4f6b3cd77d54470abb6 (diff)
download: ComputeLibrary-c4fe78527c12910d380d8eb2e5e4ed6a52df3a65.tar.gz
2 files changed, 26 insertions, 9 deletions
diff --git a/src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp b/src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp
index 632fd2636f..9eb249a66a 100644
--- a/src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp
+++ b/src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp
@@ -178,14 +178,31 @@ void ClWinogradOutputTransformKernel::configure(const ClCompileContext &compile_
 
     _num_tiles_x = num_tiles.width;
 
+    // Conditions of -cl-fast-relaxed-math causing accuracy issues can be traced from COMPMID-5324
+    const GPUTarget gpu_target    = get_target();
+    const auto      act_function  = act_info.activation();
+    const auto      src_data_type = src->data_type();
+
+    if((gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
+       && (act_function == ActivationLayerInfo::ActivationFunction::BOUNDED_RELU || act_function == ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU)
+       && (src_data_type == DataType::F32 || src_data_type == DataType::F16))
+    {
+        // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations
+        // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations
+        build_opts.add_option("-cl-unsafe-math-optimizations");
+    }
+    else
+    {
+        build_opts.add_option("-cl-fast-relaxed-math");
+    }
+
     if(_is_nhwc)
     {
         build_opts.add_option_if(bias != nullptr, std::string("-DHAS_BIAS"));
-        build_opts.add_option("-cl-fast-relaxed-math");
         build_opts.add_option("-DN0=" + support::cpp11::to_string(win_config.second.x().step()));
         build_opts.add_option("-DOUTPUT_TILE_W=" + support::cpp11::to_string(output_tile_size.width));
         build_opts.add_option("-DOUTPUT_TILE_H=" + support::cpp11::to_string(output_tile_size.height));
-        build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+        build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src_data_type));
         build_opts.add_option_if(total_batches > 1, "-DSRC_DEPTH=" + support::cpp11::to_string(src->dimension(2)));
         build_opts.add_option_if(winograd_info.kernel_size.height == 1, "-DWINOGRAD_OUTPUT_TRANSFORM_HORIZONTAL");
         build_opts.add_option_if(winograd_info.kernel_size.width == 1, "-DWINOGRAD_OUTPUT_TRANSFORM_VERTICAL");
@@ -194,12 +211,11 @@ void ClWinogradOutputTransformKernel::configure(const ClCompileContext &compile_
     else
     {
         build_opts.add_option_if(bias != nullptr, std::string("-DHAS_BIAS"));
-        build_opts.add_option("-cl-fast-relaxed-math");
         build_opts.add_option("-DN0=" + support::cpp11::to_string(win_config.second.x().step()));
         build_opts.add_option("-DNUM_TILES_X=" + support::cpp11::to_string(num_tiles.width));
         build_opts.add_option("-DOUTPUT_TILE_W=" + support::cpp11::to_string(output_tile_size.width));
         build_opts.add_option("-DOUTPUT_TILE_H=" + support::cpp11::to_string(output_tile_size.height));
-        build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src->data_type()));
+        build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(src_data_type));
         build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(src->dimension(1)));
         build_opts.add_option("-DDST_WIDTH=" + support::cpp11::to_string(dst->dimension(idx_width)));
         build_opts.add_option("-DDST_HEIGHT=" + support::cpp11::to_string(dst->dimension(idx_height)));
@@ -209,9 +225,9 @@ void ClWinogradOutputTransformKernel::configure(const ClCompileContext &compile_
     }
 
     // Storing tensor dimensions to be sent later as kernel arguments
-    _src_height  = src->dimension(1);
-    _dst_width   = dst->dimension(idx_width);
-    _dst_height  = dst->dimension(idx_height);
+    _src_height = src->dimension(1);
+    _dst_width  = dst->dimension(idx_width);
+    _dst_height = dst->dimension(idx_height);
 
     // Create kernel
     std::string kernel_name = "winograd_output_transform_" + output_tile_size.to_string() + "_" + kernel_size.to_string() + "_" + lower_string(string_from_data_layout(winograd_info.output_data_layout));
@@ -223,7 +239,7 @@ void ClWinogradOutputTransformKernel::configure(const ClCompileContext &compile_
     // Set config_id for enabling LWS tuning
     _config_id = kernel_name;
     _config_id += "_";
-    _config_id += lower_string(string_from_data_type(src->data_type()));
+    _config_id += lower_string(string_from_data_type(src_data_type));
     _config_id += "_";
     _config_id += support::cpp11::to_string(src->dimension(0));
     _config_id += "_";
diff --git a/src/gpu/cl/operators/ClWinogradConv2d.cpp b/src/gpu/cl/operators/ClWinogradConv2d.cpp
index ffa1effc74..b4163a5986 100644
--- a/src/gpu/cl/operators/ClWinogradConv2d.cpp
+++ b/src/gpu/cl/operators/ClWinogradConv2d.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -214,6 +214,7 @@ void ClWinogradConv2d::configure(const ClCompileContext &compile_context, ITenso
                                                                                                                   (src->data_type() == DataType::F16)));
 
     // Configure output transform
+    _output_transform->set_target(CLScheduler::get().target());
     _output_transform->configure(compile_context, &_batched_mm_output, biases, dst, winograd_info, act_info);
 
     _aux_mem                             = _batched_mm.workspace();
author	Gunes Bayir <gunes.bayir@arm.com>	2022-08-10 15:58:51 +0100
committer	Ramy Elgammal <ramy.elgammal@arm.com>	2022-08-11 12:36:34 +0100
commit	c4fe78527c12910d380d8eb2e5e4ed6a52df3a65 (patch)
tree	bd02ca981bb6a3d92c134d96187757d70a812128
parent	dde3756a07ee5c65ac96b4f6b3cd77d54470abb6 (diff)
download	ComputeLibrary-c4fe78527c12910d380d8eb2e5e4ed6a52df3a65.tar.gz