aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2022-08-03 16:39:23 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2022-08-08 14:58:58 +0000
commit65c8db87a2f442cc17ef90d05406e705ca7a9c1e (patch)
tree6ff8df6955114af93eca811cdd0d0c6bca4ebe7e /src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
parent992a741599647d07e6c5b852d9978e9d32f30992 (diff)
downloadComputeLibrary-65c8db87a2f442cc17ef90d05406e705ca7a9c1e.tar.gz
Fix for AI benchmark ResNet regression
* For 3x3 kernel, only choose the implementation with larger tile size if the input tensor is larger than the tile. Resolves: COMPMID-5467 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I2cf95ddb25f477cb05da3b3501e0afe9548fc33a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8022 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp')
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
index 87ad4b2437..73abe8b945 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
@@ -45,7 +45,7 @@ void arm_fp32_1x2_1x7(unsigned int, const float *, size_t, const float *, float
static const TransformImplementation<float> transforms_fp32[] = {
#if defined(__aarch64__)
#endif // defined(__aarch64__)
- { IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded) },
+ { IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded), MethodConstraints::LargerShape },
{ IMPL(2, 2, 3, 3, arm_fp32_2x2_3x3, Unpadded) },
{ IMPL(2, 2, 5, 5, arm_fp32_2x2_5x5, Unpadded) },
{ IMPL(1, 6, 1, 3, arm_fp32_1x6_1x3, Unpadded) },