From 56fabbae2309856f74151c0bc909d15d84951a2c Mon Sep 17 00:00:00 2001 From: Jakub Sujak Date: Thu, 27 Apr 2023 09:24:05 +0100 Subject: Fix performance regression in FP16 Deconvolution The previous heuristic for selecting the Deconvolution method with FP32 input data introduced a performance regression for FP16. A simple fix ensures the previous heuristic applies to FP32 types only. Resolves: COMPMID-6027 Change-Id: I77ca6c9c72534057a3967db58924a972b0efb09f Signed-off-by: Jakub Sujak Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9616 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins --- src/runtime/CL/functions/CLDeconvolutionLayer.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/runtime/CL/functions') diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index 5c25cbafaf..4421a18f2a 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" @@ -155,7 +156,9 @@ DeconvolutionMethod CLDeconvolutionLayer::get_deconvolution_method(const ITensor if(weights->dimension(idx_w) != deconv_info.stride().first || weights->dimension(idx_h) != deconv_info.stride().second) { - if(input->data_layout() == DataLayout::NHWC && ofm <= 16) + // We observe better performance for FP32 types only when ofm <= 16. + // A better heuristic is required for selecting the method for FP16 data types. + if(input->data_layout() == DataLayout::NHWC && !((input->data_type() == DataType::F32) && (ofm > 16))) { return DeconvolutionMethod::DIRECT; } -- cgit v1.2.1