From 6b6a16faa9375365d444b2a3998381b22cd6cd5b Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Thu, 28 May 2020 08:55:51 +0100 Subject: COMPMID-3501 Modify heuristics for f16+fastmath NEON Winograd Conv * Disable winograd on certain layers of squeezenet v1.1 * Fix winograd validate_kernel_3x3 Signed-off-by: SiCong Li Change-Id: I380c6e4a0f8338056839df3c8810f726227f210f Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3348 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 33 ++++++++++++++++++++++ .../NEON/functions/NEWinogradConvolutionLayer.cpp | 2 +- 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index 4a779917a7..62eabb2d61 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -181,6 +181,39 @@ ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo * { return ConvolutionMethod::GEMM; } + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + // This heuristics only applies to F16 data type on A55r1 + if(NEScheduler::get().cpu_info().get_cpu_model() == CPUModel::A55r1 && enable_fast_math && input->data_type() == DataType::F16) + { + // Exclude known bad winograd configs (and defaults to GEMM) + const std::vector known_bad_winograd_f16_with_fastmath_configs = + { + // Squeezenet_V1_1 fire2 and fire3 + ConvolutionConfiguration(Size2D(56U, 56U), Size2D(3U, 3U), Size2D(16U, 64U), PadStrideInfo(1U, 1U, 1U, 1U)), + // Squeezenet_V1_1 fire6 and fire7 + ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(48U, 192U), PadStrideInfo(1U, 1U, 1U, 1U)), + // Squeezenet_V1_1 fire8 and fire9 + ConvolutionConfiguration(Size2D(14U, 14U), Size2D(3U, 3U), Size2D(64U, 256U), PadStrideInfo(1U, 1U, 1U, 1U)), + }; + const auto find_conv_config = [&](ConvolutionConfiguration c) + { + const PadStrideInfo info = std::get<3>(c); + + return std::get<0>(c) == Size2D(input->dimension(idx_w), input->dimension(idx_h)) && std::get<1>(c) == Size2D(weights->dimension(idx_w), weights->dimension(idx_h)) + && std::get<2>(c) == Size2D(weights->dimension(idx_c), weights->dimension(3)) && info.pad_top() == conv_info.pad_top() && info.pad_right() == conv_info.pad_right() + && info.pad_bottom() == conv_info.pad_bottom() && info.pad_left() == conv_info.pad_left() && info.stride() == conv_info.stride(); + }; + + bool found_bad = std::find_if(known_bad_winograd_f16_with_fastmath_configs.begin(), known_bad_winograd_f16_with_fastmath_configs.end(), + find_conv_config) + != known_bad_winograd_f16_with_fastmath_configs.end(); + if(found_bad) + { + return ConvolutionMethod::GEMM; + } + } +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC return bool(NEWinogradConvolutionLayer::validate(input, weights, nullptr, output, conv_info, act_info, enable_fast_math)) ? ConvolutionMethod::WINOGRAD : ConvolutionMethod::GEMM; } } diff --git a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp index a74e710c62..88d8a7573f 100644 --- a/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp @@ -62,7 +62,7 @@ inline Status validate_kernel_3x3(const Size2D input_dims, const ITensorInfo *in } } #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - else if(input->data_type() == DataType::F32) + else if(input->data_type() == DataType::F16) { ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformInputKernel<__fp16, 4, 4, 3, 3>::validate(input, input0, winograd_info))); ARM_COMPUTE_RETURN_ON_ERROR((NEWinogradLayerTransformWeightsKernel<__fp16, 4, 4, 3, 3>::validate(weights, input1, winograd_info))); -- cgit v1.2.1