From 65c8db87a2f442cc17ef90d05406e705ca7a9c1e Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Wed, 3 Aug 2022 16:39:23 +0100 Subject: Fix for AI benchmark ResNet regression * For 3x3 kernel, only choose the implementation with larger tile size if the input tensor is larger than the tile. Resolves: COMPMID-5467 Signed-off-by: Viet-Hoa Do Change-Id: I2cf95ddb25f477cb05da3b3501e0afe9548fc33a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8022 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp') diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp index 87ad4b2437..73abe8b945 100644 --- a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp +++ b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp @@ -45,7 +45,7 @@ void arm_fp32_1x2_1x7(unsigned int, const float *, size_t, const float *, float static const TransformImplementation transforms_fp32[] = { #if defined(__aarch64__) #endif // defined(__aarch64__) - { IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded) }, + { IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded), MethodConstraints::LargerShape }, { IMPL(2, 2, 3, 3, arm_fp32_2x2_3x3, Unpadded) }, { IMPL(2, 2, 5, 5, arm_fp32_2x2_5x5, Unpadded) }, { IMPL(1, 6, 1, 3, arm_fp32_1x6_1x3, Unpadded) }, -- cgit v1.2.1