diff options
author | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-08-03 16:39:23 +0100 |
---|---|---|
committer | Viet-Hoa Do <viet-hoa.do@arm.com> | 2022-08-08 14:58:58 +0000 |
commit | 65c8db87a2f442cc17ef90d05406e705ca7a9c1e (patch) | |
tree | 6ff8df6955114af93eca811cdd0d0c6bca4ebe7e /src/core | |
parent | 992a741599647d07e6c5b852d9978e9d32f30992 (diff) | |
download | ComputeLibrary-65c8db87a2f442cc17ef90d05406e705ca7a9c1e.tar.gz |
Fix for AI benchmark ResNet regression
* For 3x3 kernel, only choose the implementation with larger tile
size if the input tensor is larger than the tile.
Resolves: COMPMID-5467
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Change-Id: I2cf95ddb25f477cb05da3b3501e0afe9548fc33a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8022
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp | 2 | ||||
-rw-r--r-- | src/core/NEON/kernels/convolution/winograd/winograd_implementations.hpp | 11 |
2 files changed, 11 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp index 87ad4b2437..73abe8b945 100644 --- a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp +++ b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp @@ -45,7 +45,7 @@ void arm_fp32_1x2_1x7(unsigned int, const float *, size_t, const float *, float static const TransformImplementation<float> transforms_fp32[] = { #if defined(__aarch64__) #endif // defined(__aarch64__) - { IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded) }, + { IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded), MethodConstraints::LargerShape }, { IMPL(2, 2, 3, 3, arm_fp32_2x2_3x3, Unpadded) }, { IMPL(2, 2, 5, 5, arm_fp32_2x2_5x5, Unpadded) }, { IMPL(1, 6, 1, 3, arm_fp32_1x6_1x3, Unpadded) }, diff --git a/src/core/NEON/kernels/convolution/winograd/winograd_implementations.hpp b/src/core/NEON/kernels/convolution/winograd/winograd_implementations.hpp index a23cb1d6b3..510f69baaa 100644 --- a/src/core/NEON/kernels/convolution/winograd/winograd_implementations.hpp +++ b/src/core/NEON/kernels/convolution/winograd/winograd_implementations.hpp @@ -38,6 +38,7 @@ enum class MethodConstraints RequiresSVE2 = 0x2, RequiresSME = 0x4, RequiresSME2 = 0x8, + LargerShape = 0x10, // Input tensor shape is larger than the output transform tile shape. }; constexpr inline bool operator!(const MethodConstraints &c) @@ -66,6 +67,14 @@ inline bool constraints_met(const MethodConstraints &c, const CPUInfo *ci, const ); } +inline bool output_transform_constraints_met(const output_transform::ITransform *transform, const MethodConstraints &c, const CPUInfo *ci, const ConvolutionArgs &conv_args, const WinogradConfig *cfg) +{ + return ( + constraints_met(c, ci, conv_args, cfg) && + (!(c & MethodConstraints::LargerShape) || (conv_args.input_shape.rows > transform->get_output_rows() && conv_args.input_shape.cols > transform->get_output_cols())) + ); +} + namespace weight_transform { template <typename TIn, typename TOut=TIn> @@ -209,7 +218,7 @@ inline std::vector<const output_transform::ITransform *> get_output_transforms( impl->transform.get() != nullptr; impl++) { if( - constraints_met(impl->constraints, ci, conv_args, cfg) && + output_transform_constraints_met(impl->transform.get(), impl->constraints, ci, conv_args, cfg) && impl->transform->get_kernel_rows() == conv_args.kernel_shape.rows && impl->transform->get_kernel_cols() == conv_args.kernel_shape.cols && (cfg->output_rows == 0 || cfg->output_rows == impl->transform->get_output_rows()) && |