aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2021-05-14 16:38:43 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-05-17 09:05:30 +0000
commit538a076aae14dbc1940c52057e135f8a1872aa11 (patch)
tree3eadacd0beb21d39eed87c18d4f63f255c785ae2
parented4b8a07e67c7802207c8954a88ad7a91aec79e0 (diff)
downloadComputeLibrary-538a076aae14dbc1940c52057e135f8a1872aa11.tar.gz
Fix oclgrind error on CLGEMMLowp reshaped only RHS quantized per channel
Fix corner case in which the quantization is per channel and OFM == 1 The function can safely set the per_channel quantization flag to false since there is only one output channel This way, the kernel will avoid adding useless padding to the output multipliers and shifts Resolve COMPMID-4384 Change-Id: Ic03452bfaf52d1be536cd371721adedd2e580a08 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5648 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp2
-rw-r--r--src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp6
2 files changed, 7 insertions, 1 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
index d39900a561..37c11000db 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.cpp
@@ -256,7 +256,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input0, ITe
window_changed = window_changed || update_window_and_padding(win_out, bias_access);
}
- if(output_multipliers != nullptr && output_multipliers->dimension(0) > 1)
+ if(output_multipliers != nullptr && output_stage.is_quantized_per_channel)
{
AccessWindowHorizontal output_multipliers_access(output_multipliers, 0, num_elems_processed_per_iteration_x);
AccessWindowHorizontal output_shifts_access(output_shifts, 0, num_elems_processed_per_iteration_x);
diff --git a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
index b189955c04..5a9ff7990f 100644
--- a/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
+++ b/src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp
@@ -344,6 +344,12 @@ void CLGEMMLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
GEMMLowpOutputStageInfo gemmlowp_output_stage = gemm_info.gemmlowp_output_stage();
gemmlowp_output_stage.output_data_type = _matrix_a->info()->data_type();
+ if(num_filters == 1)
+ {
+ // Per-channel quantization with OFM == 1 is equivalent to uniform quantization.
+ // Setting this flag to false prevents the kernel from adding useless padding to the output multipliers and shifts
+ gemmlowp_output_stage.is_quantized_per_channel = false;
+ }
gemm_kernel_info.output_stage = gemmlowp_output_stage;