From 38d93bdda73f9b1024c6b4b834b382f7f25aae19 Mon Sep 17 00:00:00 2001
From: Vidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>
Date: Tue, 20 Nov 2018 15:38:13 +0000
Subject: COMPMID-1801 : (Nightly) CLWinogradConvolutionLayer FP16 mismatches

FP mixed precision support added to GEMM kernel used for fp16 winograd conv on Midgard GPUs

Change-Id: I1619beb025fc484a1ac9d3e528d785edabbc7ee6
---
 src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

(limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp')

diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index b549638343..c9ed7763da 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -292,6 +292,11 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
         else
         {
             kernel_name = "gemm_mm_interleaved_transposed_" + lower_string(string_from_data_type(data_type));
+            if(fp_mixed_precision && data_type == DataType::F16)
+            {
+                // currently wider accumulator is only supported for fp16 kernels.
+                kernel_name += "_acc32";
+            }
         }
     }
     else // The input tensors have not been reshaped
@@ -307,6 +312,11 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
             if(input0->info()->num_dimensions() != 1)
             {
                 kernel_name += "_" + lower_string(string_from_data_type(data_type)) + "_bifrost";
+                if(fp_mixed_precision && data_type == DataType::F16)
+                {
+                    // currently wider accumulator is only supported for fp16 kernels.
+                    kernel_name += "_acc32";
+                }
             }
             else if(input1->info()->dimension(0) <= 1000 && data_type == DataType::F32)
             {
@@ -319,11 +329,6 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
             // The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels
             // via exhaustive autotuning over a range of representative layer configurations.
             set_lws_hint(cl::NDRange(4));
-            if(fp_mixed_precision && data_type == DataType::F16)
-            {
-                // currently wider accumulator is only supported for fp16 kernels.
-                kernel_name += "_acc32";
-            }
         }
         else // (MIDGARD and F32) or (F16)
         {
-- 
cgit v1.2.1