From 538a076aae14dbc1940c52057e135f8a1872aa11 Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Fri, 14 May 2021 16:38:43 +0100
Subject: Fix oclgrind error on CLGEMMLowp reshaped only RHS quantized per
 channel

Fix corner case in which the quantization is per channel and OFM == 1
The function can safely set the per_channel quantization flag to false since there is only one output channel
This way, the kernel will avoid adding useless padding to the output multipliers and shifts

Resolve COMPMID-4384

Change-Id: Ic03452bfaf52d1be536cd371721adedd2e580a08
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5648
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
---
 src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp')

diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index b189955c04..5a9ff7990f 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -344,6 +344,12 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
 
         GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
         gemmlowp_output_stage.output_data_type        = _matrix_a->info()->data_type();
+        if(num_filters == 1)
+        {
+            // Per-channel quantization with OFM == 1 is equivalent to uniform quantization.
+            // Setting this flag to false prevents the kernel from adding useless padding to the output multipliers and shifts
+            gemmlowp_output_stage.is_quantized_per_channel = false;
+        }
 
         gemm_kernel_info.output_stage = gemmlowp_output_stage;
 
-- 
cgit v1.2.1