From 1434155d8d937c5a07af67439f8241064f841ad8 Mon Sep 17 00:00:00 2001
From: Sangwon Ha <sangwon.ha@arm.com>
Date: Wed, 4 Oct 2023 00:35:22 +0100
Subject: Change heuristics for FP16 Deconv

- For FP16, disable direct Deconv method when oiutput channel count is
  greater than 32.

Resolves COMPMID-6311

Change-Id: I14d9dbf1a1b95736ccd09488d633df4775a01dcb
Signed-off-by: Sangwon Ha <sangwon.ha@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10446
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'src/runtime')

diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
index e988ab0ac4..4e0d1501ba 100644
--- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
+++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp
@@ -178,9 +178,9 @@ DeconvolutionMethod CLDeconvolutionLayer::get_deconvolution_method(const ITensor
     if (weights->dimension(idx_w) != deconv_info.stride().first ||
         weights->dimension(idx_h) != deconv_info.stride().second)
     {
-        // We observe better performance for FP32 types only when ofm <= 16.
-        // A better heuristic is required for selecting the method for FP16 data types.
-        if (input->data_layout() == DataLayout::NHWC && !((input->data_type() == DataType::F32) && (ofm > 16)))
+        // We observe better performance for FP32 types only when ofm <= 16, and for FP16 only when ofm <= 32.
+        if (input->data_layout() == DataLayout::NHWC && !((input->data_type() == DataType::F32) && (ofm > 16)) &&
+            !((input->data_type() == DataType::F16) && (ofm > 32)))
         {
             return DeconvolutionMethod::DIRECT;
         }
-- 
cgit v1.2.1