aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
diff options
context:
space:
mode:
authorVidhya Sudhan Loganathan <vidhyasudhan.loganathan@arm.com>2018-11-20 15:38:13 +0000
committerAnthony Barbier <Anthony.barbier@arm.com>2018-11-20 17:40:03 +0000
commit38d93bdda73f9b1024c6b4b834b382f7f25aae19 (patch)
treed2372cd8f6aafabaee500beadfab969516b1c524 /src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
parent8f2cbfa15bfb0e49ca6a334a220f0e36964289d6 (diff)
downloadComputeLibrary-38d93bdda73f9b1024c6b4b834b382f7f25aae19.tar.gz
COMPMID-1801 : (Nightly) CLWinogradConvolutionLayer FP16 mismatches
FP mixed precision support added to GEMM kernel used for fp16 winograd conv on Midgard GPUs Change-Id: I1619beb025fc484a1ac9d3e528d785edabbc7ee6
Diffstat (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp15
1 files changed, 10 insertions, 5 deletions
diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
index b549638343..c9ed7763da 100644
--- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.cpp
@@ -292,6 +292,11 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
else
{
kernel_name = "gemm_mm_interleaved_transposed_" + lower_string(string_from_data_type(data_type));
+ if(fp_mixed_precision && data_type == DataType::F16)
+ {
+ // currently wider accumulator is only supported for fp16 kernels.
+ kernel_name += "_acc32";
+ }
}
}
else // The input tensors have not been reshaped
@@ -307,6 +312,11 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
if(input0->info()->num_dimensions() != 1)
{
kernel_name += "_" + lower_string(string_from_data_type(data_type)) + "_bifrost";
+ if(fp_mixed_precision && data_type == DataType::F16)
+ {
+ // currently wider accumulator is only supported for fp16 kernels.
+ kernel_name += "_acc32";
+ }
}
else if(input1->info()->dimension(0) <= 1000 && data_type == DataType::F32)
{
@@ -319,11 +329,6 @@ void CLGEMMMatrixMultiplyKernel::configure(const ICLTensor *input0, const ICLTen
// The work-group size equal to the Bifrost quad size has been proved to be optimal for these kernels
// via exhaustive autotuning over a range of representative layer configurations.
set_lws_hint(cl::NDRange(4));
- if(fp_mixed_precision && data_type == DataType::F16)
- {
- // currently wider accumulator is only supported for fp16 kernels.
- kernel_name += "_acc32";
- }
}
else // (MIDGARD and F32) or (F16)
{